Peregrine Overall Results

Pre-Clean

#####################Read and Pre-Clean the Data#######################
require(psych) #to describe

## Loading required package: psych

## Warning: package 'psych' was built under R version 3.5.1

require(reticulate) #to use Python in R as well

## Loading required package: reticulate

## Warning: package 'reticulate' was built under R version 3.5.1

mydata=read.csv("C:/Users/lfult/OneDrive - Texas State University/BHA2/Peregrine Analysis/combinedposttest.csv")
mydata[,1:2]=NULL
str(mydata)

## 'data.frame':    45 obs. of  19 variables:
##  $ Gender    : Factor w/ 2 levels "F","M": 1 2 1 1 1 1 1 1 1 1 ...
##  $ Ethnicity : Factor w/ 4 levels "A","B","C","H": 3 2 4 4 4 4 3 3 3 2 ...
##  $ Cohort    : num  2018 2018 2018 2018 2018 ...
##  $ Fin       : int  60 50 60 60 50 80 70 40 70 60 ...
##  $ Mgt       : int  70 60 50 70 70 80 100 70 100 70 ...
##  $ Pers      : int  70 80 70 50 60 70 80 80 80 70 ...
##  $ SysOrg    : int  80 50 90 60 70 50 40 60 80 60 ...
##  $ IM        : int  40 60 40 60 60 90 50 50 50 50 ...
##  $ Ldrship   : int  60 80 70 60 70 50 60 90 60 90 ...
##  $ ChangeMgt : int  40 40 70 80 60 60 90 60 40 90 ...
##  $ ClimCult  : int  80 80 80 80 100 70 70 90 90 70 ...
##  $ QI        : int  60 50 50 60 60 60 60 80 80 50 ...
##  $ Quant     : int  NA NA NA NA NA NA NA NA NA NA ...
##  $ Strat_Mark: int  70 70 70 70 80 60 40 60 70 90 ...
##  $ Comm_Envir: int  60 80 70 60 60 70 80 80 50 70 ...
##  $ Legal     : int  70 70 60 80 70 70 90 80 70 70 ...
##  $ Score     : num  63.3 64.2 65 65.8 67.5 ...
##  $ Duration  : num  66.7 71.2 57.8 74 53.9 ...
##  $ Percent   : int  58 61 64 67 72 72 77 79 79 79 ...

#########################################################################

Descriptive Statistics / Univariate Graphs / Crosstabs

I manually coded the variable “Minority Group” with non-Census Bureau categories of {C = caucasion / non-Hispanic, B = African American or Associated Minority Group / non-Hispanic, A = Asian / Other, H = Hispanic regardless of C or B Primary Classification}. I used this coding scheme to reflect that we are a Hispanic-serving institution. Eventually, I will categorize by race and ethnicity separately using Census coding.

#############################Descriptives 1##############################
describe(mydata[,4:ncol(mydata)])

##            vars  n  mean    sd median trimmed   mad   min    max range
## Fin           1 45 69.78 14.85  70.00   70.00 14.83 40.00 100.00 60.00
## Mgt           2 45 77.33 12.86  80.00   77.03 14.83 50.00 100.00 50.00
## Pers          3 45 78.89 11.91  80.00   78.92 14.83 50.00 100.00 50.00
## SysOrg        4 45 70.44 13.81  70.00   71.08 14.83 40.00 100.00 60.00
## IM            5 45 65.56 16.17  60.00   64.86 14.83 40.00 100.00 60.00
## Ldrship       6 45 75.78 13.40  80.00   76.49 14.83 50.00 100.00 50.00
## ChangeMgt     7 45 73.56 17.47  80.00   75.14 14.83 30.00 100.00 70.00
## ClimCult      8 45 78.67 11.98  80.00   78.65 14.83 50.00 100.00 50.00
## QI            9 45 71.11 14.34  70.00   71.35 14.83 40.00 100.00 60.00
## Quant        10 12 52.50 18.15  50.00   53.00 22.24 20.00  80.00 60.00
## Strat_Mark   11 45 73.56 14.95  70.00   73.51 14.83 40.00 100.00 60.00
## Comm_Envir   12 45 69.78 11.96  70.00   69.73 14.83 50.00  90.00 40.00
## Legal        13 45 76.44 11.31  80.00   76.49 14.83 50.00 100.00 50.00
## Score        14 45 73.00  5.27  73.07   73.06  5.69 62.30  85.83 23.53
## Duration     15 45 56.40 12.40  56.37   56.22 14.53 34.37  82.93 48.56
## Percent      16 45 83.47 11.17  86.00   84.73 10.38 54.00  98.00 44.00
##             skew kurtosis   se
## Fin        -0.17    -0.87 2.21
## Mgt        -0.01    -0.89 1.92
## Pers       -0.19    -0.44 1.78
## SysOrg     -0.43    -0.16 2.06
## IM          0.38    -0.57 2.41
## Ldrship    -0.33    -0.76 2.00
## ChangeMgt  -0.69    -0.46 2.60
## ClimCult   -0.14    -0.51 1.79
## QI         -0.05    -0.96 2.14
## Quant      -0.17    -1.25 5.24
## Strat_Mark -0.05    -0.69 2.23
## Comm_Envir -0.04    -0.89 1.78
## Legal      -0.03    -0.51 1.69
## Score      -0.04    -0.60 0.79
## Duration    0.12    -0.97 1.85
## Percent    -0.90    -0.10 1.67

par(mai=c(.3,.3,.3,.3))
par(mfrow=c(5,4))

table(mydata$Gender)

## 
##  F  M 
## 33 12

table(mydata$Ethnicity)

## 
##  A  B  C  H 
##  4  7 26  8

library(ggplot2)

## Warning: package 'ggplot2' was built under R version 3.5.3

## 
## Attaching package: 'ggplot2'

## The following objects are masked from 'package:psych':
## 
##     %+%, alpha

gf=ggplot(mydata, aes(x=Score, fill=Ethnicity))+
  geom_histogram(position="identity", colour="grey40", alpha=0.2, bins =5)+facet_grid(.~Gender)
gf

for (i in 4:ncol(mydata)){
  hist(na.omit(mydata[,i]), ylim=c(0,15), xlim=c(0,100),xlab=NULL, ylab=NULL, main=colnames(mydata)[i])
  boxplot(mydata[,i],horizontal=TRUE,xlab=NULL,ylim=c(0,100), ylab=NULL, main=colnames(mydata)[i])}

myagg=aggregate(mydata[,4:ncol(mydata)], by=list(mydata$Gender), mean)
myagg

##   Group.1      Fin      Mgt     Pers   SysOrg       IM  Ldrship ChangeMgt
## 1       F 68.18182 77.27273 78.18182 70.60606 64.24242 73.63636  75.75758
## 2       M 74.16667 77.50000 80.83333 70.00000 69.16667 81.66667  67.50000
##   ClimCult       QI Quant Strat_Mark Comm_Envir    Legal    Score Duration
## 1 77.57576 71.51515    NA   72.12121   67.27273 75.45455 72.14909 56.68636
## 2 81.66667 70.00000    NA   77.50000   76.66667 79.16667 75.32250 55.62750
##    Percent
## 1 81.81818
## 2 88.00000

myagg2=aggregate(mydata[,4:ncol(mydata)], by=list(mydata$Ethnicity), mean)
myagg2

##   Group.1      Fin      Mgt     Pers   SysOrg       IM  Ldrship ChangeMgt
## 1       A 62.50000 87.50000 72.50000 70.00000 75.00000 72.50000  72.50000
## 2       B 74.28571 71.42857 78.57143 70.00000 68.57143 81.42857  74.28571
## 3       C 70.00000 77.69231 83.07692 69.61538 64.61538 75.76923  75.38462
## 4       H 68.75000 76.25000 68.75000 73.75000 61.25000 72.50000  67.50000
##   ClimCult       QI Quant Strat_Mark Comm_Envir    Legal    Score Duration
## 1 70.00000 72.50000    NA   77.50000   67.50000 85.00000 72.93000 51.05500
## 2 80.00000 57.14286    NA   78.57143   74.28571 75.71429 73.10000 64.08571
## 3 79.61538 75.38462    NA   72.69231   70.00000 75.76923 73.74923 53.26000
## 4 78.75000 68.75000    NA   70.00000   66.25000 75.00000 70.48625 62.57500
##    Percent
## 1 85.00000
## 2 84.28571
## 3 84.53846
## 4 78.50000

myagg3=aggregate(mydata[,4:ncol(mydata)], by=list(mydata$Cohort), mean)
myagg3

##   Group.1      Fin      Mgt     Pers   SysOrg       IM Ldrship ChangeMgt
## 1  2018.2 70.00000 77.87879 80.30303 70.90909 65.45455 76.9697  73.93939
## 2  2019.1 69.16667 75.83333 75.00000 69.16667 65.83333 72.5000  72.50000
##   ClimCult       QI Quant Strat_Mark Comm_Envir    Legal   Score Duration
## 1       80 70.30303    NA    73.0303   70.00000 76.36364 73.7600 55.52727
## 2       75 73.33333  52.5    75.0000   69.16667 76.66667 70.8925 58.81500
##    Percent
## 1 84.84848
## 2 79.66667

#########################################################################

Correlations

The final score is a linear combination of the other scores, and percentile rank derives from that. Percentile rank is omitted for the correlation analysis, and Spearman’s is used to avoid assumptions of normality necessary for Pearson’s. While final score is retained, it must be correlated with subscores as it is built from them.

A scatterplot illustrates that Duration is not related to overall final grade performance. There are some localized associations between duration and subscores, though.

#############################Descriptives 2##############################
mycor=round(cor(mydata[,-c(1,2,3)], method="spearman"),2) 
#by definition, final is a linear combination of other scores
#% rank follows from that.
mycor[upper.tri(mycor)==TRUE]=NA
cor.test(mydata$Pers,mydata$Duration, method="spearman")

## Warning in cor.test.default(mydata$Pers, mydata$Duration, method =
## "spearman"): Cannot compute exact p-value with ties

## 
##  Spearman's rank correlation rho
## 
## data:  mydata$Pers and mydata$Duration
## S = 20847, p-value = 0.01155
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
##        rho 
## -0.3732965

cor.test(mydata$Legal, mydata$Mgt, method="spearman")

## Warning in cor.test.default(mydata$Legal, mydata$Mgt, method = "spearman"):
## Cannot compute exact p-value with ties

## 
##  Spearman's rank correlation rho
## 
## data:  mydata$Legal and mydata$Mgt
## S = 11446, p-value = 0.1034
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
##       rho 
## 0.2459801

library(reshape2)
meltcor=melt(mycor)

library(ggplot2)
ggplot(data = meltcor, aes(Var2, Var1, fill = value))+
 geom_tile(color = "white") +
 scale_fill_gradient2(low = "blue", high = "red", mid = "white", 
   midpoint = 0, limit = c(-1,1), space = "Lab", 
   name="Pearson\nCorrelation") +
  theme_minimal()+ 
 theme(axis.text.x = element_text(angle = 45, vjust = 1, 
    size = 10, hjust = 1))+
 coord_fixed()

ggplot(mydata, aes(x=Duration, y=Score)) + 
  geom_point()+
  geom_smooth(method=lm)

# Remove the confidence interval
ggplot(mydata, aes(x=Duration, y=Score)) + 
  geom_point()+
  geom_smooth(method=lm, se=FALSE)

# Loess method
ggplot(mydata, aes(x=Duration, y=Score)) + 
  geom_point()+
  geom_smooth()

## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

#########################################################################

Inference

######################Basic Inferentials########################
myt=t.test(mydata$Score~mydata$Gender)  
myt

## 
##  Welch Two Sample t-test
## 
## data:  mydata$Score by mydata$Gender
## t = -1.8751, df = 20.448, p-value = 0.07513
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -6.6986721  0.3518539
## sample estimates:
## mean in group F mean in group M 
##        72.14909        75.32250

myt2=t.test(mydata$Score~mydata$Cohort)  
myt2

## 
##  Welch Two Sample t-test
## 
## data:  mydata$Score by mydata$Cohort
## t = 1.8218, df = 24.221, p-value = 0.08086
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.3794654  6.1144654
## sample estimates:
## mean in group 2018.2 mean in group 2019.1 
##              73.7600              70.8925

myaov=aov(mydata$Score~mydata$Ethnicity)
summary(myaov)

##                  Df Sum Sq Mean Sq F value Pr(>F)
## mydata$Ethnicity  3   65.2   21.75    0.77  0.518
## Residuals        41 1158.2   28.25

mytukeys=TukeyHSD(myaov)
mytukeys

##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = mydata$Score ~ mydata$Ethnicity)
## 
## $`mydata$Ethnicity`
##           diff        lwr      upr     p adj
## B-A  0.1700000  -8.750140 9.090140 0.9999515
## C-A  0.8192308  -6.824376 8.462837 0.9916370
## H-A -2.4437500 -11.158802 6.271302 0.8757869
## C-B  0.6492308  -5.410811 6.709273 0.9916474
## H-B -2.6137500  -9.979313 4.751813 0.7780422
## H-C -3.2629808  -9.016879 2.490917 0.4360406

plot(mytukeys)

################################################################

BHA Exit Exam

Doc Larry Fulton

5 April 2019

Peregrine Overall Results

Pre-Clean

Descriptive Statistics / Univariate Graphs / Crosstabs

Correlations

Inference