Peregrine Overall Results

Pre-Clean

#####################Read and Pre-Clean the Data#######################
require(psych) #to describe

## Loading required package: psych

## Warning: package 'psych' was built under R version 3.5.3

require(reticulate) #to use Python in R as well

## Loading required package: reticulate

## Warning: package 'reticulate' was built under R version 3.5.3

mydata=read.csv("C:/Users/lf25/OneDrive - Texas State University/BHA2/Peregrine Analysis/spring2019posttest.csv")
mydata[,1:2]=NULL
str(mydata)

## 'data.frame':    12 obs. of  19 variables:
##  $ Gender    : Factor w/ 2 levels "F","M": 1 1 1 1 1 1 1 1 1 1 ...
##  $ Ethnicity : Factor w/ 4 levels "A","B","C","H": 3 1 4 4 3 3 3 2 4 3 ...
##  $ Fin       : int  70 50 80 50 60 70 90 90 90 70 ...
##  $ Mgt       : int  80 90 90 80 60 60 60 80 80 70 ...
##  $ Pers      : int  80 80 80 70 80 60 90 90 70 70 ...
##  $ SysOrg    : int  50 70 80 90 70 40 80 80 70 80 ...
##  $ IM        : int  50 70 70 50 70 90 60 80 60 40 ...
##  $ Ldrship   : int  70 80 80 80 50 90 60 70 80 70 ...
##  $ ChangeMgt : int  70 90 50 60 90 70 90 90 70 50 ...
##  $ ClimCult  : int  100 60 70 80 50 90 70 80 90 80 ...
##  $ QI        : int  80 60 100 70 60 80 60 70 80 70 ...
##  $ Quant     : int  20 30 80 70 40 70 40 50 60 50 ...
##  $ Strat_Mark: int  70 80 70 90 50 60 100 70 70 60 ...
##  $ Comm_Envir: int  70 70 60 50 50 70 70 60 80 70 ...
##  $ Legal     : int  70 70 90 80 80 80 70 80 80 70 ...
##  $ Score     : num  67.7 69.2 76.9 70.8 62.3 ...
##  $ Duration  : num  47.2 46.6 53.9 76.7 60.2 ...
##  $ Percent   : int  73 77 92 81 54 83 85 91 90 65 ...
##  $ Cohort    : num  2019 2019 2019 2019 2019 ...

#########################################################################

Descriptive Statistics / Univariate Graphs / Crosstabs

I manually coded the variable “Minority Group” with non-Census Bureau categories of {C = caucasion / non-Hispanic, B = African American or Associated Minority Group / non-Hispanic, A = Asian / Other, H = Hispanic regardless of C or B Primary Classification}. I used this coding scheme to reflect that we are a Hispanic-serving institution. Eventually, I will categorize by race and ethnicity separately using Census coding.

#############################Descriptives 1##############################
describe(mydata[,3:ncol(mydata)])

##            vars  n    mean    sd  median trimmed   mad    min     max
## Fin           1 12   69.17 16.76   70.00   70.00 22.24   40.0   90.00
## Mgt           2 12   75.83 13.79   80.00   75.00 14.83   60.0  100.00
## Pers          3 12   75.00 10.00   75.00   75.00  7.41   60.0   90.00
## SysOrg        4 12   69.17 14.43   70.00   70.00 14.83   40.0   90.00
## IM            5 12   65.83 14.43   70.00   66.00 14.83   40.0   90.00
## Ldrship       6 12   72.50 10.55   70.00   73.00 14.83   50.0   90.00
## ChangeMgt     7 12   72.50 14.85   70.00   73.00 22.24   50.0   90.00
## ClimCult      8 12   75.00 14.46   75.00   75.00 14.83   50.0  100.00
## QI            9 12   73.33 13.03   70.00   72.00 14.83   60.0  100.00
## Quant        10 12   52.50 18.15   50.00   53.00 22.24   20.0   80.00
## Strat_Mark   11 12   75.00 15.67   70.00   75.00 14.83   50.0  100.00
## Comm_Envir   12 12   69.17 13.11   70.00   69.00 14.83   50.0   90.00
## Legal        13 12   76.67 12.31   80.00   77.00 14.83   50.0  100.00
## Score        14 12   70.89  4.36   71.15   71.15  3.99   62.3   76.92
## Duration     15 12   58.81 13.51   58.56   59.02 19.39   38.9   76.72
## Percent      16 12   79.67 11.26   82.00   81.00  9.64   54.0   92.00
## Cohort       17 12 2019.10  0.00 2019.10 2019.10  0.00 2019.1 2019.10
##            range  skew kurtosis   se
## Fin        50.00 -0.20    -1.33 4.84
## Mgt        40.00  0.13    -1.44 3.98
## Pers       30.00  0.00    -1.27 2.89
## SysOrg     50.00 -0.53    -0.86 4.17
## IM         50.00 -0.15    -1.09 4.17
## Ldrship    40.00 -0.45    -0.40 3.05
## ChangeMgt  40.00 -0.09    -1.44 4.29
## ClimCult   50.00  0.00    -1.12 4.17
## QI         40.00  0.56    -0.93 3.76
## Quant      60.00 -0.17    -1.25 5.24
## Strat_Mark 50.00  0.26    -1.16 4.52
## Comm_Envir 40.00  0.14    -1.10 3.79
## Legal      50.00 -0.22     0.06 3.55
## Score      14.62 -0.37    -0.93 1.26
## Duration   37.82 -0.03    -1.65 3.90
## Percent    38.00 -0.90    -0.22 3.25
## Cohort      0.00   NaN      NaN 0.00

par(mai=c(.3,.3,.3,.3))
par(mfrow=c(5,4))

table(mydata$Gender)

## 
##  F  M 
## 11  1

table(mydata$Ethnicity)

## 
## A B C H 
## 1 2 6 3

library(ggplot2)

## 
## Attaching package: 'ggplot2'

## The following objects are masked from 'package:psych':
## 
##     %+%, alpha

gf=ggplot(mydata, aes(x=Score, fill=Ethnicity))+
  geom_histogram(position="identity", colour="grey40", alpha=0.2, bins =3)+facet_grid(.~Gender)
gf

for (i in 3:ncol(mydata)){
  hist(mydata[,i], ylim=c(0,15), xlim=c(0,100),xlab=NULL, ylab=NULL, main=colnames(mydata)[i])
  boxplot(mydata[,i],horizontal=TRUE,xlab=NULL,ylim=c(0,100), ylab=NULL, main=colnames(mydata)[i])}

myagg=aggregate(mydata[,3:ncol(mydata)], by=list(mydata$Gender), mean)
myagg

##   Group.1      Fin      Mgt     Pers SysOrg       IM  Ldrship ChangeMgt
## 1       F 69.09091 77.27273 75.45455     70 64.54545 72.72727  72.72727
## 2       M 70.00000 60.00000 70.00000     60 80.00000 70.00000  70.00000
##   ClimCult       QI    Quant Strat_Mark Comm_Envir     Legal    Score
## 1 75.45455 74.54545 52.72727   72.72727   67.27273  74.54545 70.69455
## 2 70.00000 60.00000 50.00000  100.00000   90.00000 100.00000 73.07000
##   Duration  Percent Cohort
## 1 58.43636 79.09091 2019.1
## 2 62.98000 86.00000 2019.1

myagg2=aggregate(mydata[,3:ncol(mydata)], by=list(mydata$Ethnicity), mean)
myagg2

##   Group.1      Fin      Mgt     Pers   SysOrg       IM  Ldrship ChangeMgt
## 1       A 50.00000 90.00000 80.00000 70.00000 70.00000 80.00000  90.00000
## 2       B 80.00000 70.00000 80.00000 70.00000 80.00000 70.00000  80.00000
## 3       C 66.66667 71.66667 73.33333 63.33333 63.33333 68.33333  73.33333
## 4       H 73.33333 83.33333 73.33333 80.00000 60.00000 80.00000  60.00000
##   ClimCult       QI    Quant Strat_Mark Comm_Envir    Legal    Score
## 1       60 60.00000 30.00000   80.00000   70.00000 70.00000 69.23000
## 2       75 65.00000 50.00000   85.00000   75.00000 90.00000 74.61000
## 3       75 73.33333 48.33333   70.00000   70.00000 70.00000 68.20000
## 4       80 83.33333 70.00000   76.66667   63.33333 83.33333 74.35333
##   Duration  Percent Cohort
## 1   46.570 77.00000 2019.1
## 2   68.980 88.50000 2019.1
## 3   52.915 73.16667 2019.1
## 4   67.920 87.66667 2019.1

#########################################################################

Correlations

The final score is a linear combination of the other scores, and percentile rank derives from that. Percentile rank is omitted for the correlation analysis, and Spearman’s is used to avoid assumptions of normality necessary for Pearson’s. While final score is retained, it must be correlated with subscores as it is built from them.

A scatterplot illustrates that Duration is not related to overall final grade performance. There are some localized associations between duration and subscores, though.

#############################Descriptives 2##############################
mycor=round(cor(mydata[,-c(1,2)], method="spearman"),2)

## Warning in cor(mydata[, -c(1, 2)], method = "spearman"): the standard
## deviation is zero

mycor[upper.tri(mycor)==TRUE]=NA
cor.test(mydata$Pers,mydata$Duration, method="spearman")

## Warning in cor.test.default(mydata$Pers, mydata$Duration, method =
## "spearman"): Cannot compute exact p-value with ties

## 
##  Spearman's rank correlation rho
## 
## data:  mydata$Pers and mydata$Duration
## S = 356.78, p-value = 0.4381
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
##        rho 
## -0.2474708

cor.test(mydata$Legal, mydata$Mgt, method="spearman")

## Warning in cor.test.default(mydata$Legal, mydata$Mgt, method = "spearman"):
## Cannot compute exact p-value with ties

## 
##  Spearman's rank correlation rho
## 
## data:  mydata$Legal and mydata$Mgt
## S = 374.39, p-value = 0.3283
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
##        rho 
## -0.3090672

library(reshape2)
meltcor=melt(mycor)

library(ggplot2)
ggplot(data = meltcor, aes(Var2, Var1, fill = value))+
 geom_tile(color = "white") +
 scale_fill_gradient2(low = "blue", high = "red", mid = "white", 
   midpoint = 0, limit = c(-1,1), space = "Lab", 
   name="Pearson\nCorrelation") +
  theme_minimal()+ 
 theme(axis.text.x = element_text(angle = 45, vjust = 1, 
    size = 10, hjust = 1))+
 coord_fixed()

ggplot(mydata, aes(x=Duration, y=Score)) + 
  geom_point()+
  geom_smooth(method=lm)

# Remove the confidence interval
ggplot(mydata, aes(x=Duration, y=Score)) + 
  geom_point()+
  geom_smooth(method=lm, se=FALSE)

# Loess method
ggplot(mydata, aes(x=Duration, y=Score)) + 
  geom_point()+
  geom_smooth()

## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

#########################################################################

Inference

######################Basic Inferentials########################
#myt=t.test(mydata$Score~mydata$Gender)  all females
#myt

myaov=aov(mydata$Score~mydata$Ethnicity)
summary(myaov)

##                  Df Sum Sq Mean Sq F value Pr(>F)  
## mydata$Ethnicity  3 109.83   36.61   2.945 0.0986 .
## Residuals         8  99.46   12.43                 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

mytukeys=TukeyHSD(myaov)
mytukeys

##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = mydata$Score ~ mydata$Ethnicity)
## 
## $`mydata$Ethnicity`
##           diff        lwr       upr     p adj
## B-A  5.3800000  -8.448994 19.208994 0.6179501
## C-A -1.0300000 -13.226026 11.166026 0.9925366
## H-A  5.1233333  -7.914767 18.161434 0.6109402
## C-B -6.4100000 -15.629329  2.809329 0.1956629
## H-B -0.2566667 -10.564190 10.050857 0.9998021
## H-C  6.1533333  -1.830840 14.137507 0.1405245

plot(mytukeys)

################################################################

BHA Exit Exam

Doc Larry Fulton

5 April 2019

Peregrine Overall Results

Pre-Clean

Descriptive Statistics / Univariate Graphs / Crosstabs

Correlations

Inference