#####################Read and Pre-Clean the Data#######################
library(psych) #to describe
## Warning: package 'psych' was built under R version 3.5.1
library(reticulate) #to use Python in R as well
## Warning: package 'reticulate' was built under R version 3.5.1
mydata=read.csv("C:/Users/lfult/Desktop/Peregrine Analysis/mha.csv")
mydata$First.Name=NULL
mydata$Last.Name=NULL
str(mydata)
## 'data.frame': 16 obs. of 28 variables:
## $ Gender : Factor w/ 2 levels "F","M": 2 2 2 2 2 1 2 1 2 2 ...
## $ Ethnicity : Factor w/ 3 levels "A","C","H": 2 3 1 1 2 3 1 2 1 1 ...
## $ Vision : int 70 90 80 80 70 60 100 80 90 90 ...
## $ Comm : int 90 80 70 80 90 80 90 80 90 80 ...
## $ ContToCommProf : int 40 70 80 90 60 100 90 90 80 60 ...
## $ Fac_Negot : int 50 80 80 50 60 60 70 70 90 80 ...
## $ Fin : int 40 70 50 80 60 90 50 60 60 80 ...
## $ Mgt : int 60 40 70 70 80 80 80 80 90 80 ...
## $ Pers : int 70 80 70 80 100 100 80 90 100 80 ...
## $ SysOrg : int 40 70 90 90 70 70 70 90 60 80 ...
## $ HRM : int 80 40 90 80 80 70 60 70 80 90 ...
## $ IM : int 80 50 70 40 60 60 90 70 90 80 ...
## $ Ldrship : int 70 80 90 90 70 100 90 90 60 70 ...
## $ ChangeMgt : int 80 80 60 90 90 50 80 100 100 80 ...
## $ ClimCult : int 90 70 40 70 80 80 90 90 90 70 ...
## $ DynGov : int 70 50 50 70 80 80 70 80 100 100 ...
## $ PersProfAcc : int 60 60 70 90 90 60 80 70 40 70 ...
## $ LifelongLearning: int 70 80 70 60 80 90 80 80 90 90 ...
## $ QI : int 70 70 40 70 70 70 80 80 70 70 ...
## $ RelMgt : int 80 80 90 100 80 80 60 80 80 90 ...
## $ RiskMgt : int 90 70 80 50 80 70 100 70 60 70 ...
## $ Strat_Mark : int 60 90 70 70 70 60 60 80 70 80 ...
## $ Comm_Envir : int 50 70 80 50 90 90 50 30 80 70 ...
## $ Legal : int 50 70 90 80 70 90 90 70 80 80 ...
## $ PatientPersp : int 80 100 80 80 70 90 80 90 60 90 ...
## $ Score : num 67 71.3 72.2 74.3 76.1 ...
## $ Duration : num 147 100 161 189 150 ...
## $ Percent : int 53 68 71 77 81 84 85 85 87 88 ...
#########################################################################
I manually coded the variable “Minority Group” with non-Census Bureau categories of {C = caucasion / non-Hispanic, B = African American or Associated Minority Group / non-Hispanic, A = Asian / Other, H = Hispanic regardless of C or B Primary Classification}. I used this coding scheme to reflect that we are a Hispanic-serving institution. Eventually, I will categorize by race and ethnicity separately using Census coding.
#############################Descriptives 1##############################
describe(mydata[,3:28])
## vars n mean sd median trimmed mad min max
## Vision 1 16 81.88 11.09 80.00 82.14 14.83 60.00 100.00
## Comm 2 16 83.75 7.19 80.00 83.57 0.00 70.00 100.00
## ContToCommProf 3 16 81.25 17.08 90.00 82.86 14.83 40.00 100.00
## Fac_Negot 4 16 72.50 13.42 75.00 72.86 14.83 50.00 90.00
## Fin 5 16 70.62 15.69 75.00 71.43 22.24 40.00 90.00
## Mgt 6 16 77.50 14.38 80.00 78.57 7.41 40.00 100.00
## Pers 7 16 82.50 16.53 80.00 84.29 14.83 40.00 100.00
## SysOrg 8 16 76.25 14.55 75.00 77.14 7.41 40.00 100.00
## HRM 9 16 73.75 15.44 80.00 74.29 14.83 40.00 100.00
## IM 10 16 71.25 15.86 70.00 72.14 14.83 40.00 90.00
## Ldrship 11 16 80.62 11.24 80.00 80.71 14.83 60.00 100.00
## ChangeMgt 12 16 79.38 16.11 80.00 80.00 14.83 50.00 100.00
## ClimCult 13 16 80.00 14.61 85.00 81.43 7.41 40.00 100.00
## DynGov 14 16 76.25 14.08 80.00 76.43 14.83 50.00 100.00
## PersProfAcc 15 16 75.00 15.49 75.00 75.71 22.24 40.00 100.00
## LifelongLearning 16 16 81.88 10.47 80.00 82.14 14.83 60.00 100.00
## QI 17 16 72.50 11.83 70.00 73.57 7.41 40.00 90.00
## RelMgt 18 16 83.12 11.38 80.00 83.57 14.83 60.00 100.00
## RiskMgt 19 16 76.25 15.00 70.00 76.43 14.83 50.00 100.00
## Strat_Mark 20 16 74.38 11.53 75.00 73.57 7.41 60.00 100.00
## Comm_Envir 21 16 71.25 18.57 75.00 72.86 22.24 30.00 90.00
## Legal 22 16 80.00 13.17 80.00 80.71 14.83 50.00 100.00
## PatientPersp 23 16 83.75 10.25 80.00 84.29 14.83 60.00 100.00
## Score 24 16 77.63 4.56 78.25 77.98 3.87 66.95 83.47
## Duration 25 16 147.76 38.30 149.03 147.85 45.64 89.28 204.90
## Percent 26 16 83.00 11.02 86.00 84.36 7.41 53.00 94.00
## range skew kurtosis se
## Vision 40.00 -0.07 -0.84 2.77
## Comm 30.00 0.41 -0.31 1.80
## ContToCommProf 60.00 -0.86 -0.25 4.27
## Fac_Negot 40.00 -0.27 -1.30 3.35
## Fin 50.00 -0.39 -1.21 3.92
## Mgt 60.00 -0.73 0.83 3.59
## Pers 60.00 -0.79 0.21 4.13
## SysOrg 60.00 -0.60 0.16 3.64
## HRM 60.00 -0.49 -0.46 3.86
## IM 50.00 -0.38 -1.12 3.97
## Ldrship 40.00 -0.11 -1.31 2.81
## ChangeMgt 50.00 -0.44 -0.93 4.03
## ClimCult 60.00 -1.08 0.90 3.65
## DynGov 50.00 -0.17 -0.41 3.52
## PersProfAcc 60.00 -0.40 -0.56 3.87
## LifelongLearning 40.00 -0.35 -0.80 2.62
## QI 50.00 -0.91 1.27 2.96
## RelMgt 40.00 -0.07 -0.78 2.85
## RiskMgt 50.00 0.27 -1.01 3.75
## Strat_Mark 40.00 0.39 -0.63 2.88
## Comm_Envir 60.00 -0.64 -0.82 4.64
## Legal 50.00 -0.66 -0.46 3.29
## PatientPersp 40.00 -0.38 -0.15 2.56
## Score 16.52 -0.74 -0.33 1.14
## Duration 115.62 -0.15 -1.44 9.57
## Percent 41.00 -1.30 0.93 2.75
par(mai=c(.3,.3,.3,.3))
par(mfrow=c(5,4))
table(mydata$Gender)
##
## F M
## 5 11
table(mydata$Ethnicity)
##
## A C H
## 8 5 3
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 3.5.1
##
## Attaching package: 'ggplot2'
## The following objects are masked from 'package:psych':
##
## %+%, alpha
gf=ggplot(mydata, aes(x=Score, fill=Ethnicity))+
geom_histogram(position="identity", colour="grey40", alpha=0.2, bins =3)+facet_grid(.~Gender)
gf
subdata=mydata[,3:28]
for (i in 1:26){
hist(subdata[,i], ylim=c(0,15), xlim=c(0,100),xlab=NULL, ylab=NULL, main=colnames(subdata)[i])
boxplot(subdata[,i],horizontal=TRUE,xlab=NULL,ylim=c(0,100), ylab=NULL, main=colnames(mydata)[i])}
myagg=aggregate(mydata[,3:28], by=list(mydata$Gender), mean)
myagg
## Group.1 Vision Comm ContToCommProf Fac_Negot Fin Mgt
## 1 F 80.00000 84.00000 88.00000 74.00000 82.00000 84.00000
## 2 M 82.72727 83.63636 78.18182 71.81818 65.45455 74.54545
## Pers SysOrg HRM IM Ldrship ChangeMgt ClimCult DynGov
## 1 92.00000 76.00000 70.00000 70.00000 84.00000 74.00000 86.00000 78.00000
## 2 78.18182 76.36364 75.45455 71.81818 79.09091 81.81818 77.27273 75.45455
## PersProfAcc LifelongLearning QI RelMgt RiskMgt Strat_Mark
## 1 78.00000 82.00000 80.00000 84.00000 74.00000 80.00000
## 2 73.63636 81.81818 69.09091 82.72727 77.27273 71.81818
## Comm_Envir Legal PatientPersp Score Duration Percent
## 1 74 84.00000 90.00000 80.34200 123.8700 89.20000
## 2 70 78.18182 80.90909 76.39727 158.6164 80.18182
myagg2=aggregate(mydata[,3:28], by=list(mydata$Ethnicity), mean)
myagg2
## Group.1 Vision Comm ContToCommProf Fac_Negot Fin Mgt Pers
## 1 A 85 82.5 86.25000 75.00000 68.75000 80.00000 76.25000
## 2 C 78 88.0 70.00000 68.00000 66.00000 80.00000 92.00000
## 3 H 80 80.0 86.66667 73.33333 83.33333 66.66667 83.33333
## SysOrg HRM IM Ldrship ChangeMgt ClimCult DynGov PersProfAcc
## 1 82.5 78.75000 75.00000 81.25000 81.25 76.25 78.75 75
## 2 70.0 72.00000 76.00000 76.00000 82.00 86.00 76.00 78
## 3 70.0 63.33333 53.33333 86.66667 70.00 80.00 70.00 70
## LifelongLearning QI RelMgt RiskMgt Strat_Mark Comm_Envir
## 1 83.75000 68.75000 83.75000 76.25 71.25000 70.00000
## 2 76.00000 78.00000 82.00000 80.00 78.00000 66.00000
## 3 86.66667 73.33333 83.33333 70.00 76.66667 83.33333
## Legal PatientPersp Score Duration Percent
## 1 83.75000 80.00000 78.25500 168.6012 85.00000
## 2 72.00000 84.00000 77.12400 128.0700 80.80000
## 3 83.33333 93.33333 76.80667 124.9900 81.33333
#########################################################################
pd=import("pandas")
df=mydata
pd$DataFrame$describe(df)
## Vision Comm ContToCommProf Fac_Negot Fin Mgt
## count 16.00000 16.000000 16.00000 16.00000 16.00000 16.00000
## mean 81.87500 83.750000 81.25000 72.50000 70.62500 77.50000
## std 11.08678 7.187953 17.07825 13.41641 15.69235 14.37591
## min 60.00000 70.000000 40.00000 50.00000 40.00000 40.00000
## 25% 77.50000 80.000000 70.00000 60.00000 60.00000 70.00000
## 50% 80.00000 80.000000 90.00000 75.00000 75.00000 80.00000
## 75% 90.00000 90.000000 90.00000 80.00000 80.00000 80.00000
## max 100.00000 100.000000 100.00000 90.00000 90.00000 100.00000
## Pers SysOrg HRM IM Ldrship ChangeMgt ClimCult
## count 16.0000 16.00000 16.00000 16.00000 16.0000 16.00000 16.00000
## mean 82.5000 76.25000 73.75000 71.25000 80.6250 79.37500 80.00000
## std 16.5328 14.54877 15.43805 15.86401 11.2361 16.11159 14.60593
## min 40.0000 40.00000 40.00000 40.00000 60.0000 50.00000 40.00000
## 25% 70.0000 70.00000 67.50000 60.00000 70.0000 70.00000 70.00000
## 50% 80.0000 75.00000 80.00000 70.00000 80.0000 80.00000 85.00000
## 75% 100.0000 90.00000 80.00000 82.50000 90.0000 90.00000 90.00000
## max 100.0000 100.00000 100.00000 90.00000 100.0000 100.00000 100.00000
## DynGov PersProfAcc LifelongLearning QI RelMgt RiskMgt
## count 16.00000 16.00000 16.00000 16.00000 16.00000 16.00
## mean 76.25000 75.00000 81.87500 72.50000 83.12500 76.25
## std 14.08309 15.49193 10.46821 11.83216 11.38347 15.00
## min 50.00000 40.00000 60.00000 40.00000 60.00000 50.00
## 25% 70.00000 67.50000 77.50000 70.00000 80.00000 70.00
## 50% 80.00000 75.00000 80.00000 70.00000 80.00000 70.00
## 75% 80.00000 90.00000 90.00000 80.00000 90.00000 82.50
## max 100.00000 100.00000 100.00000 90.00000 100.00000 100.00
## Strat_Mark Comm_Envir Legal PatientPersp Score Duration
## count 16.00000 16.00000 16.00000 16.00000 16.000000 16.00000
## mean 74.37500 71.25000 80.00000 83.75000 77.630000 147.75813
## std 11.52895 18.57418 13.16561 10.24695 4.562184 38.29905
## min 60.00000 30.00000 50.00000 60.00000 66.950000 89.28000
## 25% 67.50000 57.50000 70.00000 80.00000 75.645000 117.78500
## 50% 75.00000 75.00000 80.00000 80.00000 78.255000 149.02500
## 75% 80.00000 90.00000 90.00000 90.00000 80.647500 177.68000
## max 100.00000 90.00000 100.00000 100.00000 83.470000 204.90000
## Percent
## count 16.00000
## mean 83.00000
## std 11.01514
## min 53.00000
## 25% 80.00000
## 50% 86.00000
## 75% 90.25000
## max 94.00000
The final score is a linear combination of the other scores, and percentile rank derives from that. Percentile rank is omitted for the correlation analysis, and Spearman’s is used to avoid assumptions of normality necessary for Pearson’s. While final score is retained, it must be correlated with subscores as it is built from them.
A scatterplot illustrates that Duration is not related to overall final grade performance. There are some localized associations between duration and subscores, though.
#############################Descriptives 2##############################
mycor=round(cor(mydata[,-c(1,2,26,28)], method="spearman"),2) #eliminate final / percent rank
#by definition, final is a linear combination of other scores
#% rank follows from that.
mycor[upper.tri(mycor)==TRUE]=NA
cor.test(mydata$Pers,mydata$Duration, method="spearman")
## Warning in cor.test.default(mydata$Pers, mydata$Duration, method =
## "spearman"): Cannot compute exact p-value with ties
##
## Spearman's rank correlation rho
##
## data: mydata$Pers and mydata$Duration
## S = 1007, p-value = 0.05933
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
## rho
## -0.4808996
cor.test(mydata$Legal, mydata$Mgt, method="spearman")
## Warning in cor.test.default(mydata$Legal, mydata$Mgt, method = "spearman"):
## Cannot compute exact p-value with ties
##
## Spearman's rank correlation rho
##
## data: mydata$Legal and mydata$Mgt
## S = 329.8, p-value = 0.04121
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
## rho
## 0.5150059
library(reshape2)
meltcor=melt(mycor)
library(ggplot2)
ggplot(data = meltcor, aes(Var2, Var1, fill = value))+
geom_tile(color = "white") +
scale_fill_gradient2(low = "blue", high = "red", mid = "white",
midpoint = 0, limit = c(-1,1), space = "Lab",
name="Pearson\nCorrelation") +
theme_minimal()+
theme(axis.text.x = element_text(angle = 45, vjust = 1,
size = 10, hjust = 1))+
coord_fixed()
ggplot(mydata, aes(x=Duration, y=Score)) +
geom_point()+
geom_smooth(method=lm)
# Remove the confidence interval
ggplot(mydata, aes(x=Duration, y=Score)) +
geom_point()+
geom_smooth(method=lm, se=FALSE)
# Loess method
ggplot(mydata, aes(x=Duration, y=Score)) +
geom_point()+
geom_smooth()
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
#########################################################################
######################Basic Inferentials########################
myt=t.test(mydata$Score~mydata$Gender)
summary(myt)
## Length Class Mode
## statistic 1 -none- numeric
## parameter 1 -none- numeric
## p.value 1 -none- numeric
## conf.int 2 -none- numeric
## estimate 2 -none- numeric
## null.value 1 -none- numeric
## alternative 1 -none- character
## method 1 -none- character
## data.name 1 -none- character
myaov=aov(mydata$Score~mydata$Ethnicity)
summary(myaov)
## Df Sum Sq Mean Sq F value Pr(>F)
## mydata$Ethnicity 2 6.44 3.219 0.137 0.873
## Residuals 13 305.76 23.520
mytukeys=TukeyHSD(myaov)
mytukeys
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = mydata$Score ~ mydata$Ethnicity)
##
## $`mydata$Ethnicity`
## diff lwr upr p adj
## C-A -1.1310000 -8.431271 6.169271 0.9124937
## H-A -1.4483333 -10.117713 7.221047 0.8991042
## H-C -0.3173333 -9.669165 9.034498 0.9955855
plot(mytukeys)
################################################################