#####################Read and Pre-Clean the Data#######################
require(psych) #to describe
## Loading required package: psych
## Warning: package 'psych' was built under R version 3.5.3
require(reticulate) #to use Python in R as well
## Loading required package: reticulate
## Warning: package 'reticulate' was built under R version 3.5.3
mydata=read.csv("C:/Users/lfult/OneDrive - Texas State University/MHA BHA BSHS Honors/Peregrine Analysis/spring2020MHA.csv")
mydata[,1:2]=NULL
str(mydata)
## 'data.frame': 13 obs. of 29 variables:
## $ Gender : Factor w/ 2 levels "F","M": 1 1 1 2 1 1 2 1 2 2 ...
## $ Ethnicity : Factor w/ 4 levels "B","C","H","O": 2 3 2 3 2 4 2 1 3 1 ...
## $ Vision : int 90 80 100 80 90 90 100 100 100 70 ...
## $ Commo : int 90 100 100 80 100 100 90 90 80 90 ...
## $ Community : int 80 80 90 80 90 70 100 90 80 80 ...
## $ Negot : int 100 80 70 80 70 100 100 90 90 70 ...
## $ Fin : int 90 70 90 80 50 60 70 70 70 80 ...
## $ GM : int 100 60 80 70 60 100 70 80 70 70 ...
## $ Personnel : int 80 80 80 90 80 100 80 80 90 100 ...
## $ Systems : int 70 90 90 90 50 90 80 90 80 90 ...
## $ HRM : int 80 80 90 80 60 80 60 90 60 50 ...
## $ HIM : int 80 80 80 60 70 90 90 80 100 70 ...
## $ Leadership : int 80 90 80 90 100 90 80 80 90 70 ...
## $ Change : int 90 80 90 90 90 60 90 100 100 70 ...
## $ Climate : int 80 90 70 70 90 90 90 90 80 80 ...
## $ Dynamics : int 70 50 60 60 90 100 80 70 90 70 ...
## $ Accountability: int 90 80 90 60 70 80 100 100 80 70 ...
## $ ProfDev : int 90 100 100 100 80 80 90 100 80 90 ...
## $ QI : int 70 70 90 80 90 90 70 80 60 70 ...
## $ Quant : int 40 40 70 30 50 80 40 60 30 60 ...
## $ Relationship : int 90 100 90 90 100 100 90 100 90 90 ...
## $ RM : int 80 90 100 90 90 80 80 80 70 80 ...
## $ Strategy : int 70 100 90 80 100 80 70 80 80 80 ...
## $ Environment : int 90 50 90 70 80 90 90 70 80 80 ...
## $ Legal : int 100 80 80 100 90 70 90 90 80 70 ...
## $ PatientPersp : int 90 80 100 80 80 90 80 100 80 70 ...
## $ Score : num 82.9 79.2 86.2 78.3 80 ...
## $ Duration : num 96.3 92.6 118.5 132.6 139.2 ...
## $ Rank : int 93 88 96 87 90 96 93 96 89 81 ...
#########################################################################
I manually coded the variable “Minority Group” with non-Census Bureau categories of {C = caucasion / non-Hispanic, B = African American or Associated Minority Group / non-Hispanic, A = Asian / Other, H = Hispanic regardless of C or B Primary Classification}. I used this coding scheme to reflect that we are a Hispanic-serving institution. Eventually, I will categorize by race and ethnicity separately using Census coding.
#############################Descriptives 1##############################
describe(mydata[,3:ncol(mydata)])
## vars n mean sd median trimmed mad min max range
## Vision 1 13 89.23 9.54 90.00 90.00 14.83 70.00 100.00 30.00
## Commo 2 13 91.54 6.89 90.00 91.82 0.00 80.00 100.00 20.00
## Community 3 13 82.31 8.32 80.00 81.82 0.00 70.00 100.00 30.00
## Negot 4 13 84.62 11.27 80.00 84.55 14.83 70.00 100.00 30.00
## Fin 5 13 73.08 12.51 70.00 73.64 14.83 50.00 90.00 40.00
## GM 6 13 76.92 14.37 70.00 76.36 14.83 60.00 100.00 40.00
## Personnel 7 13 85.38 8.77 80.00 85.45 14.83 70.00 100.00 30.00
## Systems 8 13 80.00 12.25 80.00 81.82 14.83 50.00 90.00 40.00
## HRM 9 13 74.62 13.30 80.00 75.45 14.83 50.00 90.00 40.00
## HIM 10 13 75.38 19.84 80.00 78.18 14.83 20.00 100.00 80.00
## Leadership 11 13 83.85 8.70 80.00 83.64 14.83 70.00 100.00 30.00
## Change 12 13 84.62 11.98 90.00 85.45 14.83 60.00 100.00 40.00
## Climate 13 13 85.38 9.67 90.00 85.45 14.83 70.00 100.00 30.00
## Dynamics 14 13 72.31 15.36 70.00 71.82 14.83 50.00 100.00 50.00
## Accountability 15 13 81.54 12.14 80.00 81.82 14.83 60.00 100.00 40.00
## ProfDev 16 13 88.46 9.87 90.00 89.09 14.83 70.00 100.00 30.00
## QI 17 13 76.15 9.61 70.00 76.36 14.83 60.00 90.00 30.00
## Quant 18 13 46.92 17.97 40.00 46.36 14.83 20.00 80.00 60.00
## Relationship 19 13 90.77 8.62 90.00 91.82 0.00 70.00 100.00 30.00
## RM 20 13 85.38 8.77 80.00 85.45 14.83 70.00 100.00 30.00
## Strategy 21 13 83.85 9.61 80.00 83.64 14.83 70.00 100.00 30.00
## Environment 22 13 76.92 13.77 80.00 78.18 14.83 50.00 90.00 40.00
## Legal 23 13 86.15 9.61 90.00 86.36 14.83 70.00 100.00 30.00
## PatientPersp 24 13 85.38 9.67 80.00 85.45 0.00 70.00 100.00 30.00
## Score 25 13 80.86 3.73 80.00 80.94 3.71 74.58 86.25 11.67
## Duration 26 13 143.37 27.30 151.87 144.34 18.71 92.57 183.48 90.91
## Rank 27 13 89.85 5.40 90.00 90.27 4.45 79.00 96.00 17.00
## skew kurtosis se
## Vision -0.40 -1.03 2.65
## Commo -0.16 -1.09 1.91
## Community 0.41 -0.52 2.31
## Negot 0.09 -1.52 3.12
## Fin -0.07 -1.13 3.47
## GM 0.65 -1.14 3.98
## Personnel 0.23 -1.00 2.43
## Systems -1.00 0.08 3.40
## HRM -0.41 -1.34 3.69
## HIM -1.47 1.95 5.50
## Leadership -0.02 -0.99 2.41
## Change -0.59 -0.85 3.32
## Climate -0.10 -1.17 2.68
## Dynamics 0.15 -1.16 4.26
## Accountability -0.01 -1.17 3.37
## ProfDev -0.20 -1.32 2.74
## QI 0.22 -1.31 2.66
## Quant 0.27 -1.27 4.99
## Relationship -0.85 0.18 2.39
## RM 0.23 -1.00 2.43
## Strategy 0.30 -1.06 2.66
## Environment -0.55 -1.20 3.82
## Legal -0.30 -1.06 2.66
## PatientPersp 0.41 -1.25 2.68
## Score 0.01 -1.27 1.03
## Duration -0.56 -0.85 7.57
## Rank -0.60 -0.77 1.50
par(mai=c(.3,.3,.3,.3))
par(mfrow=c(5,4))
table(mydata$Gender)
##
## F M
## 8 5
table(mydata$Ethnicity)
##
## B C H O
## 2 5 3 3
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 3.5.3
##
## Attaching package: 'ggplot2'
## The following objects are masked from 'package:psych':
##
## %+%, alpha
gf=ggplot(mydata, aes(x=Score, fill=Ethnicity))+
geom_histogram(position="identity", colour="grey40", alpha=0.2, bins =3)+facet_grid(.~Gender)
gf
for (i in 3:ncol(mydata)){
hist(mydata[,i], ylim=c(0,15), xlim=c(0,100),xlab=NULL, ylab=NULL, main=colnames(mydata)[i])
boxplot(mydata[,i],horizontal=TRUE,xlab=NULL,ylim=c(0,100), ylab=NULL, main=colnames(mydata)[i])}
myagg=aggregate(mydata[,3:ncol(mydata)], by=list(mydata$Gender), mean)
myagg
## Group.1 Vision Commo Community Negot Fin GM Personnel Systems HRM HIM
## 1 F 90 95 81.25 85 70 81.25 85 78.75 80 80
## 2 M 88 86 84.00 84 78 70.00 86 82.00 66 68
## Leadership Change Climate Dynamics Accountability ProfDev QI Quant
## 1 85 82.5 86.25 71.25 82.5 90 78.75 48.75
## 2 82 88.0 84.00 74.00 80.0 86 72.00 44.00
## Relationship RM Strategy Environment Legal PatientPersp Score Duration
## 1 93.75 86.25 87.5 73.75 86.25 90 82.0275 137.3363
## 2 86.00 84.00 78.0 82.00 86.00 78 78.9980 153.0280
## Rank
## 1 91.25
## 2 87.60
myagg2=aggregate(mydata[,3:ncol(mydata)], by=list(mydata$Ethnicity), mean)
myagg2
## Group.1 Vision Commo Community Negot Fin GM Personnel
## 1 B 85.00000 90.00000 85.00000 80.00000 75.00000 75.00000 90.00000
## 2 C 94.00000 94.00000 88.00000 84.00000 78.00000 76.00000 78.00000
## 3 H 86.66667 86.66667 80.00000 83.33333 73.33333 66.66667 86.66667
## 4 O 86.66667 93.33333 73.33333 90.00000 63.33333 90.00000 93.33333
## Systems HRM HIM Leadership Change Climate Dynamics Accountability
## 1 90.00000 70.00000 75.00000 75.00000 85 85 70.00000 85.00000
## 2 72.00000 74.00000 68.00000 84.00000 90 86 74.00000 88.00000
## 3 86.66667 73.33333 80.00000 90.00000 90 80 66.66667 73.33333
## 4 80.00000 80.00000 83.33333 83.33333 70 90 76.66667 76.66667
## ProfDev QI Quant Relationship RM Strategy Environment
## 1 95.00000 75.00000 60.00000 95.00000 80.00000 80.00000 75.00000
## 2 86.00000 80.00000 52.00000 88.00000 90.00000 82.00000 88.00000
## 3 93.33333 70.00000 33.33333 93.33333 83.33333 86.66667 66.66667
## 4 83.33333 76.66667 43.33333 90.00000 83.33333 86.66667 70.00000
## Legal PatientPersp Score Duration Rank
## 1 80.00000 85 80.83000 155.9550 88.5
## 2 90.00000 86 82.08200 135.8500 92.0
## 3 86.66667 80 79.02333 126.2467 88.0
## 4 83.33333 90 80.69000 164.6433 89.0
#########################################################################
The final score is a linear combination of the other scores, and percentile rank derives from that. Percentile rank is omitted for the correlation analysis, and Spearman’s is used to avoid assumptions of normality necessary for Pearson’s. While final score is retained, it must be correlated with subscores as it is built from them.
A scatterplot illustrates that Duration is not related to overall final grade performance. There are some localized associations between duration and subscores, though.
#############################Descriptives 2##############################
testdata=mydata[,-c(1,2)]
mycor=round(cor(testdata, method="spearman"),2)
mycor[upper.tri(mycor)==TRUE]=NA
library(reshape2)
meltcor=melt(mycor)
library(corrplot)
## Warning: package 'corrplot' was built under R version 3.5.3
## corrplot 0.84 loaded
library(ggplot2)
corrplot(cor(testdata))
ggplot(mydata, aes(x=Duration, y=Score)) +
geom_point()+
geom_smooth(method=lm)
## `geom_smooth()` using formula 'y ~ x'
# Remove the confidence interval
ggplot(mydata, aes(x=Duration, y=Score)) +
geom_point()+
geom_smooth(method=lm, se=FALSE)
## `geom_smooth()` using formula 'y ~ x'
# Loess method
ggplot(mydata, aes(x=Duration, y=Score)) +
geom_point()+
geom_smooth()
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
#########################################################################
######################Basic Inferentials########################
#myt=t.test(mydata$Score~mydata$Gender) all females
#myt
myaov=aov(mydata$Score~mydata$Ethnicity)
summary(myaov)
## Df Sum Sq Mean Sq F value Pr(>F)
## mydata$Ethnicity 3 17.67 5.892 0.355 0.787
## Residuals 9 149.17 16.575
mytukeys=TukeyHSD(myaov)
mytukeys
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = mydata$Score ~ mydata$Ethnicity)
##
## $`mydata$Ethnicity`
## diff lwr upr p adj
## C-B 1.252000 -9.381479 11.885479 0.9819659
## H-B -1.806667 -13.408743 9.795410 0.9602644
## O-B -0.140000 -11.742077 11.462077 0.9999793
## H-C -3.058667 -12.340328 6.222995 0.7377730
## O-C -1.392000 -10.673661 7.889661 0.9642117
## O-H 1.666667 -8.710546 12.043879 0.9567144
plot(mytukeys)
################################################################