Processing math: 100%
  • Load Data
  • Describe Data
  • Imputation
  • Plots
    • Total Score
    • Color
    • Score by Gender
    • Scores ~ Underrepresented
    • Scores ~ True Color
    • Scores ~ Function Pair Differences
    • Scores ~ Num Matches
    • Linear Model, Total Score
    • Linear Model 2, Part 1 Score
    • Linear Model 2, Score 2

Load Data

library(psych)
## Warning: package 'psych' was built under R version 4.0.5
setwd("D:/kim graphs")
compdata=read.csv("kim study.csv", stringsAsFactors = T)
compdata$Gender=as.factor(compdata$Gender)
levels(compdata$Gender)=c("Female", "Male")

Describe Data

describe(compdata[,2:length(compdata)])
##              vars   n   mean    sd median trimmed   mad  min   max range  skew
## Diversity*      1 111   1.61  0.49   2.00    1.64  0.00  1.0   2.0   1.0 -0.46
## Gender*         2 111   1.32  0.47   1.00    1.27  0.00  1.0   2.0   1.0  0.78
## Exam_1          3 111  78.69  6.68  79.20   79.29  5.49 45.3  92.0  46.7 -1.94
## Exam_2          4 111  76.15 10.66  76.90   76.97  9.19 45.3  95.6  50.3 -0.81
## Total_Score     5 111 154.84 14.88 156.00  156.24 12.75 90.6 178.2  87.6 -1.36
## True_Color*     6 111   2.43  1.33   2.00    2.29  1.48  1.0   5.0   4.0  0.67
## Green           7 111   0.15  0.36   0.00    0.07  0.00  0.0   1.0   1.0  1.90
## Blue            8 111   0.32  0.47   0.00    0.27  0.00  0.0   1.0   1.0  0.78
## Gold            9 111   0.34  0.48   0.00    0.30  0.00  0.0   1.0   1.0  0.66
## Orange         10 111   0.12  0.32   0.00    0.02  0.00  0.0   1.0   1.0  2.35
## E_I            11 111   0.37  0.48   0.00    0.34  0.00  0.0   1.0   1.0  0.53
## S_N            12 111   0.64  0.48   1.00    0.67  0.00  0.0   1.0   1.0 -0.57
## T_F            13 111   0.50  0.50   1.00    0.51  0.00  0.0   1.0   1.0 -0.02
## J_P            14 111   0.18  0.39   0.00    0.10  0.00  0.0   1.0   1.0  1.64
## GPA            15 111   3.61  0.26   3.64    3.62  0.33  3.0   4.0   1.0 -0.31
## ST             16 111   0.23  0.43   0.00    0.17  0.00  0.0   1.0   1.0  1.24
## SF             17 111   0.13  0.33   0.00    0.03  0.00  0.0   1.0   1.0  2.22
## NF             18 111   0.38  0.49   0.00    0.35  0.00  0.0   1.0   1.0  0.49
## NT             19 111   0.26  0.44   0.00    0.20  0.00  0.0   1.0   1.0  1.07
## Internal*      20 111   2.23  1.18   2.00    2.17  1.48  1.0   4.0   3.0  0.39
## NumMatches*    21 111   2.20  1.37   2.00    2.00  1.48  1.0   5.0   4.0  1.10
## Chair_Match*   22 111   1.21  0.41   1.00    1.13  0.00  1.0   2.0   1.0  1.43
##              kurtosis   se
## Diversity*      -1.81 0.05
## Gender*         -1.40 0.04
## Exam_1           7.04 0.63
## Exam_2           0.65 1.01
## Total_Score      3.28 1.41
## True_Color*     -0.77 0.13
## Green            1.63 0.03
## Blue            -1.40 0.04
## Gold            -1.58 0.05
## Orange           3.55 0.03
## E_I             -1.73 0.05
## S_N             -1.69 0.05
## T_F             -2.02 0.05
## J_P              0.70 0.04
## GPA             -1.08 0.03
## ST              -0.47 0.04
## SF               2.96 0.03
## NF              -1.77 0.05
## NT              -0.86 0.04
## Internal*       -1.37 0.11
## NumMatches*     -0.04 0.13
## Chair_Match*     0.03 0.04
cor(compdata$Total_Score,compdata$GPA)
## [1] 0.4596028

Imputation

Chair Match Unknown is singular with Number of Matches Unknown. So we use Chair Match==1 versus Chair Match ≠1

Plots

Total Score

library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.0.5
## 
## Attaching package: 'ggplot2'
## The following objects are masked from 'package:psych':
## 
##     %+%, alpha
ggplot(compdata, aes(x=Total_Score), col=Gender)+
  geom_histogram(binwidth=7,col='black',fill='red')+
  ggtitle('Comprehensive Examination Score')+
  ylab('Frequency')+
  xlab('Score')

Color

barplot(table(compdata$True_Color), col=c('blue','gold', 'green', 'orange', 'gray'), main='True Color', ylab='Frequency')

## Score by True Color

boxplot(compdata$Total_Score~compdata$True_Color, notch=F, horizontal=T, col=c('blue','gold', 'green', 'orange', 'gray'),main="True Color Differences", xlab='Frequency', ylab='')

Score by Gender

boxplot(compdata$Total_Score~compdata$Gender, notch=T, horizontal=T, col=c("red", "blue"),main="Gender Differences", xlab='Frequency', ylab='')

Scores ~ Underrepresented

boxplot(compdata$Total_Score~compdata$Diversity, notch=TRUE, horizontal=TRUE, col=c("red", "blue"),
        main="Scores by Diversity", ylab="", xlab='Frequency')

Scores ~ True Color

boxplot(compdata$Total_Score~compdata$True_Color, notch=FALSE, horizontal=TRUE, col=c('blue','gold','dark green', 'dark orange','gray'),main="True Color Differences", ylab='',xlab="Frequency")

Scores ~ Function Pair Differences

boxplot(compdata$Total_Score~compdata$Internal, notch=T, horizontal=TRUE, main="Function Pair Differences", xlab='Frequency', ylab='')

Scores ~ Num Matches

boxplot(compdata$Exam_2~as.factor(compdata$NumMatches), notch=F, horizontal=TRUE, main="True Color Student # of Matches with Committee Members", xlab='frequency',ylab="Number of Matches")

## Scores ~ Chair Match

boxplot(compdata$Exam_2~as.factor(compdata$Chair_Match), notch=T, horizontal=TRUE, main="True Color Student vs. Chair Match", xlab='frequency',ylab="Chair Match")

# Inferentials

Linear Model, Total Score

NOTE: Removed number of matches until we have complete data.

mylm=lm(Total_Score~Gender+Diversity+GPA+as.factor(True_Color)+Internal+Chair_Match+NumMatches, data=compdata)
summary(mylm)
## 
## Call:
## lm(formula = Total_Score ~ Gender + Diversity + GPA + as.factor(True_Color) + 
##     Internal + Chair_Match + NumMatches, data = compdata)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -54.840  -5.432   0.000   7.182  30.626 
## 
## Coefficients:
##                              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                   76.1154    19.8520   3.834 0.000226 ***
## GenderMale                     0.3084     2.7968   0.110 0.912419    
## DiversityUnderrepresented     -3.6405     2.6516  -1.373 0.173003    
## GPA                           23.7437     5.3238   4.460 2.25e-05 ***
## as.factor(True_Color)Gold     -6.8043     3.4527  -1.971 0.051667 .  
## as.factor(True_Color)Green    -5.7287     4.2319  -1.354 0.179048    
## as.factor(True_Color)Orange   -8.1258     4.3371  -1.874 0.064063 .  
## as.factor(True_Color)Unknown -10.0701     4.4919  -2.242 0.027298 *  
## InternalNT                    -0.3392     3.3326  -0.102 0.919146    
## InternalSF                   -13.2188     3.9451  -3.351 0.001158 ** 
## InternalST                     1.9328     3.5732   0.541 0.589838    
## Chair_MatchY                  -1.7438     3.9676  -0.440 0.661295    
## NumMatches1                    2.1310     3.2643   0.653 0.515454    
## NumMatches2                    6.3666     5.4264   1.173 0.243625    
## NumMatches3                   21.3118    13.7340   1.552 0.124045    
## NumMatchesUnknown              4.3330     3.8970   1.112 0.268998    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 12.5 on 95 degrees of freedom
## Multiple R-squared:  0.3901, Adjusted R-squared:  0.2938 
## F-statistic:  4.05 on 15 and 95 DF,  p-value: 1.202e-05

Linear Model 2, Part 1 Score

mylm=lm(Exam_1~Gender+Diversity+GPA+as.factor(True_Color)+Internal+Chair_Match+NumMatches, data=compdata)
summary(mylm)
## 
## Call:
## lm(formula = Exam_1 ~ Gender + Diversity + GPA + as.factor(True_Color) + 
##     Internal + Chair_Match + NumMatches, data = compdata)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -29.6496  -2.4840   0.5882   4.3385   9.9204 
## 
## Coefficients:
##                              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                   52.0552    10.1167   5.145 1.43e-06 ***
## GenderMale                     0.5951     1.4253   0.418  0.67722    
## DiversityUnderrepresented     -0.6182     1.3513  -0.457  0.64838    
## GPA                            8.1844     2.7130   3.017  0.00328 ** 
## as.factor(True_Color)Gold     -3.7935     1.7595  -2.156  0.03361 *  
## as.factor(True_Color)Green    -1.6418     2.1566  -0.761  0.44838    
## as.factor(True_Color)Orange   -4.3535     2.2102  -1.970  0.05178 .  
## as.factor(True_Color)Unknown  -4.2827     2.2891  -1.871  0.06444 .  
## InternalNT                     0.3881     1.6983   0.228  0.81975    
## InternalSF                    -3.0015     2.0104  -1.493  0.13876    
## InternalST                     0.2166     1.8209   0.119  0.90555    
## Chair_MatchY                  -1.1643     2.0219  -0.576  0.56609    
## NumMatches1                   -0.4472     1.6635  -0.269  0.78862    
## NumMatches2                    0.8663     2.7653   0.313  0.75476    
## NumMatches3                   13.2136     6.9989   1.888  0.06208 .  
## NumMatchesUnknown              1.9550     1.9859   0.984  0.32741    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 6.371 on 95 degrees of freedom
## Multiple R-squared:  0.2142, Adjusted R-squared:  0.09014 
## F-statistic: 1.727 on 15 and 95 DF,  p-value: 0.05837

Linear Model 2, Score 2

mylm=lm(Exam_2~Gender+Diversity+GPA+as.factor(True_Color)+Chair_Match+NumMatches, data=compdata)
summary(mylm)
## 
## Call:
## lm(formula = Exam_2 ~ Gender + Diversity + GPA + as.factor(True_Color) + 
##     Chair_Match + NumMatches, data = compdata)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -35.286  -3.850   0.172   6.032  16.469 
## 
## Coefficients:
##                              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                  18.94257   14.60172   1.297   0.1976    
## GenderMale                    1.07191    2.08674   0.514   0.6086    
## DiversityUnderrepresented    -3.39127    1.99644  -1.699   0.0926 .  
## GPA                          16.21149    3.97092   4.083  9.1e-05 ***
## as.factor(True_Color)Gold    -0.51056    2.38343  -0.214   0.8308    
## as.factor(True_Color)Green   -1.50754    3.09753  -0.487   0.6276    
## as.factor(True_Color)Orange  -2.57731    3.23211  -0.797   0.4271    
## as.factor(True_Color)Unknown -5.26765    3.40092  -1.549   0.1246    
## Chair_MatchY                  0.01827    2.90094   0.006   0.9950    
## NumMatches1                   2.30239    2.43539   0.945   0.3468    
## NumMatches2                   5.06248    4.04535   1.251   0.2138    
## NumMatches3                   7.21617   10.13544   0.712   0.4782    
## NumMatchesUnknown             2.48395    2.94982   0.842   0.4018    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 9.506 on 98 degrees of freedom
## Multiple R-squared:  0.292,  Adjusted R-squared:  0.2053 
## F-statistic: 3.368 on 12 and 98 DF,  p-value: 0.0003907