Peregrine Overall Results

#####################Read and Pre-Clean the Data#######################
library(psych) #to describe
## Warning: package 'psych' was built under R version 3.5.1
library(reticulate) #to use Python in R as well
## Warning: package 'reticulate' was built under R version 3.5.1
mydata=read.csv("C:/Users/lfult/Desktop/Peregrine Analysis/mha.csv")
mydata$First.Name=NULL
mydata$Last.Name=NULL
str(mydata)
## 'data.frame':    16 obs. of  28 variables:
##  $ Gender          : Factor w/ 2 levels "F","M": 2 2 2 2 2 1 2 1 2 2 ...
##  $ Ethnicity       : Factor w/ 3 levels "A","C","H": 2 3 1 1 2 3 1 2 1 1 ...
##  $ Vision          : int  70 90 80 80 70 60 100 80 90 90 ...
##  $ Comm            : int  90 80 70 80 90 80 90 80 90 80 ...
##  $ ContToCommProf  : int  40 70 80 90 60 100 90 90 80 60 ...
##  $ Fac_Negot       : int  50 80 80 50 60 60 70 70 90 80 ...
##  $ Fin             : int  40 70 50 80 60 90 50 60 60 80 ...
##  $ Mgt             : int  60 40 70 70 80 80 80 80 90 80 ...
##  $ Pers            : int  70 80 70 80 100 100 80 90 100 80 ...
##  $ SysOrg          : int  40 70 90 90 70 70 70 90 60 80 ...
##  $ HRM             : int  80 40 90 80 80 70 60 70 80 90 ...
##  $ IM              : int  80 50 70 40 60 60 90 70 90 80 ...
##  $ Ldrship         : int  70 80 90 90 70 100 90 90 60 70 ...
##  $ ChangeMgt       : int  80 80 60 90 90 50 80 100 100 80 ...
##  $ ClimCult        : int  90 70 40 70 80 80 90 90 90 70 ...
##  $ DynGov          : int  70 50 50 70 80 80 70 80 100 100 ...
##  $ PersProfAcc     : int  60 60 70 90 90 60 80 70 40 70 ...
##  $ LifelongLearning: int  70 80 70 60 80 90 80 80 90 90 ...
##  $ QI              : int  70 70 40 70 70 70 80 80 70 70 ...
##  $ RelMgt          : int  80 80 90 100 80 80 60 80 80 90 ...
##  $ RiskMgt         : int  90 70 80 50 80 70 100 70 60 70 ...
##  $ Strat_Mark      : int  60 90 70 70 70 60 60 80 70 80 ...
##  $ Comm_Envir      : int  50 70 80 50 90 90 50 30 80 70 ...
##  $ Legal           : int  50 70 90 80 70 90 90 70 80 80 ...
##  $ PatientPersp    : int  80 100 80 80 70 90 80 90 60 90 ...
##  $ Score           : num  67 71.3 72.2 74.3 76.1 ...
##  $ Duration        : num  147 100 161 189 150 ...
##  $ Percent         : int  53 68 71 77 81 84 85 85 87 88 ...
#########################################################################

Descriptive Statistics / Univariate Graphs / Crosstabs

I manually coded the variable “Minority Group” with non-Census Bureau categories of {C = caucasion / non-Hispanic, B = African American or Associated Minority Group / non-Hispanic, A = Asian / Other, H = Hispanic regardless of C or B Primary Classification}. I used this coding scheme to reflect that we are a Hispanic-serving institution. Eventually, I will categorize by race and ethnicity separately using Census coding.

#############################Descriptives 1##############################
describe(mydata[,3:28])
##                  vars  n   mean    sd median trimmed   mad   min    max
## Vision              1 16  81.88 11.09  80.00   82.14 14.83 60.00 100.00
## Comm                2 16  83.75  7.19  80.00   83.57  0.00 70.00 100.00
## ContToCommProf      3 16  81.25 17.08  90.00   82.86 14.83 40.00 100.00
## Fac_Negot           4 16  72.50 13.42  75.00   72.86 14.83 50.00  90.00
## Fin                 5 16  70.62 15.69  75.00   71.43 22.24 40.00  90.00
## Mgt                 6 16  77.50 14.38  80.00   78.57  7.41 40.00 100.00
## Pers                7 16  82.50 16.53  80.00   84.29 14.83 40.00 100.00
## SysOrg              8 16  76.25 14.55  75.00   77.14  7.41 40.00 100.00
## HRM                 9 16  73.75 15.44  80.00   74.29 14.83 40.00 100.00
## IM                 10 16  71.25 15.86  70.00   72.14 14.83 40.00  90.00
## Ldrship            11 16  80.62 11.24  80.00   80.71 14.83 60.00 100.00
## ChangeMgt          12 16  79.38 16.11  80.00   80.00 14.83 50.00 100.00
## ClimCult           13 16  80.00 14.61  85.00   81.43  7.41 40.00 100.00
## DynGov             14 16  76.25 14.08  80.00   76.43 14.83 50.00 100.00
## PersProfAcc        15 16  75.00 15.49  75.00   75.71 22.24 40.00 100.00
## LifelongLearning   16 16  81.88 10.47  80.00   82.14 14.83 60.00 100.00
## QI                 17 16  72.50 11.83  70.00   73.57  7.41 40.00  90.00
## RelMgt             18 16  83.12 11.38  80.00   83.57 14.83 60.00 100.00
## RiskMgt            19 16  76.25 15.00  70.00   76.43 14.83 50.00 100.00
## Strat_Mark         20 16  74.38 11.53  75.00   73.57  7.41 60.00 100.00
## Comm_Envir         21 16  71.25 18.57  75.00   72.86 22.24 30.00  90.00
## Legal              22 16  80.00 13.17  80.00   80.71 14.83 50.00 100.00
## PatientPersp       23 16  83.75 10.25  80.00   84.29 14.83 60.00 100.00
## Score              24 16  77.63  4.56  78.25   77.98  3.87 66.95  83.47
## Duration           25 16 147.76 38.30 149.03  147.85 45.64 89.28 204.90
## Percent            26 16  83.00 11.02  86.00   84.36  7.41 53.00  94.00
##                   range  skew kurtosis   se
## Vision            40.00 -0.07    -0.84 2.77
## Comm              30.00  0.41    -0.31 1.80
## ContToCommProf    60.00 -0.86    -0.25 4.27
## Fac_Negot         40.00 -0.27    -1.30 3.35
## Fin               50.00 -0.39    -1.21 3.92
## Mgt               60.00 -0.73     0.83 3.59
## Pers              60.00 -0.79     0.21 4.13
## SysOrg            60.00 -0.60     0.16 3.64
## HRM               60.00 -0.49    -0.46 3.86
## IM                50.00 -0.38    -1.12 3.97
## Ldrship           40.00 -0.11    -1.31 2.81
## ChangeMgt         50.00 -0.44    -0.93 4.03
## ClimCult          60.00 -1.08     0.90 3.65
## DynGov            50.00 -0.17    -0.41 3.52
## PersProfAcc       60.00 -0.40    -0.56 3.87
## LifelongLearning  40.00 -0.35    -0.80 2.62
## QI                50.00 -0.91     1.27 2.96
## RelMgt            40.00 -0.07    -0.78 2.85
## RiskMgt           50.00  0.27    -1.01 3.75
## Strat_Mark        40.00  0.39    -0.63 2.88
## Comm_Envir        60.00 -0.64    -0.82 4.64
## Legal             50.00 -0.66    -0.46 3.29
## PatientPersp      40.00 -0.38    -0.15 2.56
## Score             16.52 -0.74    -0.33 1.14
## Duration         115.62 -0.15    -1.44 9.57
## Percent           41.00 -1.30     0.93 2.75
par(mai=c(.3,.3,.3,.3))
par(mfrow=c(5,4))

table(mydata$Gender)
## 
##  F  M 
##  5 11
table(mydata$Ethnicity)
## 
## A C H 
## 8 5 3
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 3.5.1
## 
## Attaching package: 'ggplot2'
## The following objects are masked from 'package:psych':
## 
##     %+%, alpha
gf=ggplot(mydata, aes(x=Score, fill=Ethnicity))+
  geom_histogram(position="identity", colour="grey40", alpha=0.2, bins =3)+facet_grid(.~Gender)
gf

subdata=mydata[,3:28]
for (i in 1:26){
  hist(subdata[,i], ylim=c(0,15), xlim=c(0,100),xlab=NULL, ylab=NULL, main=colnames(subdata)[i])
  boxplot(subdata[,i],horizontal=TRUE,xlab=NULL,ylim=c(0,100), ylab=NULL, main=colnames(mydata)[i])}

myagg=aggregate(mydata[,3:28], by=list(mydata$Gender), mean)
myagg
##   Group.1   Vision     Comm ContToCommProf Fac_Negot      Fin      Mgt
## 1       F 80.00000 84.00000       88.00000  74.00000 82.00000 84.00000
## 2       M 82.72727 83.63636       78.18182  71.81818 65.45455 74.54545
##       Pers   SysOrg      HRM       IM  Ldrship ChangeMgt ClimCult   DynGov
## 1 92.00000 76.00000 70.00000 70.00000 84.00000  74.00000 86.00000 78.00000
## 2 78.18182 76.36364 75.45455 71.81818 79.09091  81.81818 77.27273 75.45455
##   PersProfAcc LifelongLearning       QI   RelMgt  RiskMgt Strat_Mark
## 1    78.00000         82.00000 80.00000 84.00000 74.00000   80.00000
## 2    73.63636         81.81818 69.09091 82.72727 77.27273   71.81818
##   Comm_Envir    Legal PatientPersp    Score Duration  Percent
## 1         74 84.00000     90.00000 80.34200 123.8700 89.20000
## 2         70 78.18182     80.90909 76.39727 158.6164 80.18182
myagg2=aggregate(mydata[,3:28], by=list(mydata$Ethnicity), mean)
myagg2
##   Group.1 Vision Comm ContToCommProf Fac_Negot      Fin      Mgt     Pers
## 1       A     85 82.5       86.25000  75.00000 68.75000 80.00000 76.25000
## 2       C     78 88.0       70.00000  68.00000 66.00000 80.00000 92.00000
## 3       H     80 80.0       86.66667  73.33333 83.33333 66.66667 83.33333
##   SysOrg      HRM       IM  Ldrship ChangeMgt ClimCult DynGov PersProfAcc
## 1   82.5 78.75000 75.00000 81.25000     81.25    76.25  78.75          75
## 2   70.0 72.00000 76.00000 76.00000     82.00    86.00  76.00          78
## 3   70.0 63.33333 53.33333 86.66667     70.00    80.00  70.00          70
##   LifelongLearning       QI   RelMgt RiskMgt Strat_Mark Comm_Envir
## 1         83.75000 68.75000 83.75000   76.25   71.25000   70.00000
## 2         76.00000 78.00000 82.00000   80.00   78.00000   66.00000
## 3         86.66667 73.33333 83.33333   70.00   76.66667   83.33333
##      Legal PatientPersp    Score Duration  Percent
## 1 83.75000     80.00000 78.25500 168.6012 85.00000
## 2 72.00000     84.00000 77.12400 128.0700 80.80000
## 3 83.33333     93.33333 76.80667 124.9900 81.33333
#########################################################################

pd=import("pandas")
df=mydata
pd$DataFrame$describe(df)
##          Vision       Comm ContToCommProf Fac_Negot      Fin       Mgt
## count  16.00000  16.000000       16.00000  16.00000 16.00000  16.00000
## mean   81.87500  83.750000       81.25000  72.50000 70.62500  77.50000
## std    11.08678   7.187953       17.07825  13.41641 15.69235  14.37591
## min    60.00000  70.000000       40.00000  50.00000 40.00000  40.00000
## 25%    77.50000  80.000000       70.00000  60.00000 60.00000  70.00000
## 50%    80.00000  80.000000       90.00000  75.00000 75.00000  80.00000
## 75%    90.00000  90.000000       90.00000  80.00000 80.00000  80.00000
## max   100.00000 100.000000      100.00000  90.00000 90.00000 100.00000
##           Pers    SysOrg       HRM       IM  Ldrship ChangeMgt  ClimCult
## count  16.0000  16.00000  16.00000 16.00000  16.0000  16.00000  16.00000
## mean   82.5000  76.25000  73.75000 71.25000  80.6250  79.37500  80.00000
## std    16.5328  14.54877  15.43805 15.86401  11.2361  16.11159  14.60593
## min    40.0000  40.00000  40.00000 40.00000  60.0000  50.00000  40.00000
## 25%    70.0000  70.00000  67.50000 60.00000  70.0000  70.00000  70.00000
## 50%    80.0000  75.00000  80.00000 70.00000  80.0000  80.00000  85.00000
## 75%   100.0000  90.00000  80.00000 82.50000  90.0000  90.00000  90.00000
## max   100.0000 100.00000 100.00000 90.00000 100.0000 100.00000 100.00000
##          DynGov PersProfAcc LifelongLearning       QI    RelMgt RiskMgt
## count  16.00000    16.00000         16.00000 16.00000  16.00000   16.00
## mean   76.25000    75.00000         81.87500 72.50000  83.12500   76.25
## std    14.08309    15.49193         10.46821 11.83216  11.38347   15.00
## min    50.00000    40.00000         60.00000 40.00000  60.00000   50.00
## 25%    70.00000    67.50000         77.50000 70.00000  80.00000   70.00
## 50%    80.00000    75.00000         80.00000 70.00000  80.00000   70.00
## 75%    80.00000    90.00000         90.00000 80.00000  90.00000   82.50
## max   100.00000   100.00000        100.00000 90.00000 100.00000  100.00
##       Strat_Mark Comm_Envir     Legal PatientPersp     Score  Duration
## count   16.00000   16.00000  16.00000     16.00000 16.000000  16.00000
## mean    74.37500   71.25000  80.00000     83.75000 77.630000 147.75813
## std     11.52895   18.57418  13.16561     10.24695  4.562184  38.29905
## min     60.00000   30.00000  50.00000     60.00000 66.950000  89.28000
## 25%     67.50000   57.50000  70.00000     80.00000 75.645000 117.78500
## 50%     75.00000   75.00000  80.00000     80.00000 78.255000 149.02500
## 75%     80.00000   90.00000  90.00000     90.00000 80.647500 177.68000
## max    100.00000   90.00000 100.00000    100.00000 83.470000 204.90000
##        Percent
## count 16.00000
## mean  83.00000
## std   11.01514
## min   53.00000
## 25%   80.00000
## 50%   86.00000
## 75%   90.25000
## max   94.00000

Correlations

The final score is a linear combination of the other scores, and percentile rank derives from that. Percentile rank is omitted for the correlation analysis, and Spearman’s is used to avoid assumptions of normality necessary for Pearson’s. While final score is retained, it must be correlated with subscores as it is built from them.

A scatterplot illustrates that Duration is not related to overall final grade performance. There are some localized associations between duration and subscores, though.

#############################Descriptives 2##############################
mycor=round(cor(mydata[,-c(1,2,26,28)], method="spearman"),2)  #eliminate final / percent rank
#by definition, final is a linear combination of other scores
#% rank follows from that.
mycor[upper.tri(mycor)==TRUE]=NA
cor.test(mydata$Pers,mydata$Duration, method="spearman")
## Warning in cor.test.default(mydata$Pers, mydata$Duration, method =
## "spearman"): Cannot compute exact p-value with ties
## 
##  Spearman's rank correlation rho
## 
## data:  mydata$Pers and mydata$Duration
## S = 1007, p-value = 0.05933
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
##        rho 
## -0.4808996
cor.test(mydata$Legal, mydata$Mgt, method="spearman")
## Warning in cor.test.default(mydata$Legal, mydata$Mgt, method = "spearman"):
## Cannot compute exact p-value with ties
## 
##  Spearman's rank correlation rho
## 
## data:  mydata$Legal and mydata$Mgt
## S = 329.8, p-value = 0.04121
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
##       rho 
## 0.5150059
library(reshape2)
meltcor=melt(mycor)

library(ggplot2)
ggplot(data = meltcor, aes(Var2, Var1, fill = value))+
 geom_tile(color = "white") +
 scale_fill_gradient2(low = "blue", high = "red", mid = "white", 
   midpoint = 0, limit = c(-1,1), space = "Lab", 
   name="Pearson\nCorrelation") +
  theme_minimal()+ 
 theme(axis.text.x = element_text(angle = 45, vjust = 1, 
    size = 10, hjust = 1))+
 coord_fixed()

ggplot(mydata, aes(x=Duration, y=Score)) + 
  geom_point()+
  geom_smooth(method=lm)

# Remove the confidence interval
ggplot(mydata, aes(x=Duration, y=Score)) + 
  geom_point()+
  geom_smooth(method=lm, se=FALSE)

# Loess method
ggplot(mydata, aes(x=Duration, y=Score)) + 
  geom_point()+
  geom_smooth()
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

#########################################################################

Inference

######################Basic Inferentials########################
myt=t.test(mydata$Score~mydata$Gender)
summary(myt)
##             Length Class  Mode     
## statistic   1      -none- numeric  
## parameter   1      -none- numeric  
## p.value     1      -none- numeric  
## conf.int    2      -none- numeric  
## estimate    2      -none- numeric  
## null.value  1      -none- numeric  
## alternative 1      -none- character
## method      1      -none- character
## data.name   1      -none- character
myaov=aov(mydata$Score~mydata$Ethnicity)
summary(myaov)
##                  Df Sum Sq Mean Sq F value Pr(>F)
## mydata$Ethnicity  2   6.44   3.219   0.137  0.873
## Residuals        13 305.76  23.520
mytukeys=TukeyHSD(myaov)
mytukeys
##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = mydata$Score ~ mydata$Ethnicity)
## 
## $`mydata$Ethnicity`
##           diff        lwr      upr     p adj
## C-A -1.1310000  -8.431271 6.169271 0.9124937
## H-A -1.4483333 -10.117713 7.221047 0.8991042
## H-C -0.3173333  -9.669165 9.034498 0.9955855
plot(mytukeys)

################################################################