Load Data

library(psych)

## Warning: package 'psych' was built under R version 4.0.5

setwd("D:/desktopbackup/kim")
compdata=read.csv("kim study.csv", stringsAsFactors = T)
compdata$Gender=as.factor(compdata$Gender)
levels(compdata$Gender)=c("Female", "Male")

Describe Data

describe(compdata[,2:length(compdata)])

##              vars  n   mean    sd median trimmed   mad  min   max range  skew
## Diversity*      1 76   1.66  0.48   2.00    1.69  0.00  1.0   2.0   1.0 -0.65
## Gender*         2 76   1.38  0.49   1.00    1.35  0.00  1.0   2.0   1.0  0.48
## Exam_1          3 76  78.47  7.27  79.60   79.43  5.19 45.3  90.0  44.7 -2.22
## Exam_2          4 76  75.44 10.88  76.55   75.96  9.34 45.3  95.6  50.3 -0.57
## Total_Score     5 76 153.90 15.68 155.85  155.56 13.64 90.6 178.2  87.6 -1.51
## True_Color*     6 76   2.58  1.44   2.00    2.48  1.48  1.0   5.0   4.0  0.51
## Green           7 76   0.16  0.37   0.00    0.08  0.00  0.0   1.0   1.0  1.84
## Blue            8 76   0.34  0.48   0.00    0.31  0.00  0.0   1.0   1.0  0.65
## Gold            9 76   0.29  0.46   0.00    0.24  0.00  0.0   1.0   1.0  0.91
## Orange         10 76   0.11  0.31   0.00    0.02  0.00  0.0   1.0   1.0  2.52
## E_I            11 76   0.32  0.47   0.00    0.27  0.00  0.0   1.0   1.0  0.78
## S_N            12 76   0.67  0.47   1.00    0.71  0.00  0.0   1.0   1.0 -0.71
## T_F            13 76   0.53  0.50   1.00    0.53  0.00  0.0   1.0   1.0 -0.10
## J_P            14 76   0.21  0.41   0.00    0.15  0.00  0.0   1.0   1.0  1.39
## GPA            15 76   3.56  0.26   3.60    3.56  0.33  3.0   4.0   1.0 -0.14
## ST             16 76   0.20  0.40   0.00    0.13  0.00  0.0   1.0   1.0  1.49
## SF             17 76   0.14  0.35   0.00    0.06  0.00  0.0   1.0   1.0  1.98
## NF             18 76   0.38  0.49   0.00    0.35  0.00  0.0   1.0   1.0  0.48
## NT             19 76   0.28  0.45   0.00    0.23  0.00  0.0   1.0   1.0  0.98
## Internal*      20 76   2.16  1.14   2.00    2.08  1.48  1.0   4.0   3.0  0.49
## NumMatches*    21 76   2.18  1.15   2.00    2.11  1.48  1.0   4.0   3.0  0.52
## Chair_Match*   22 76   1.21  0.41   1.00    1.15  0.00  1.0   2.0   1.0  1.39
##              kurtosis   se
## Diversity*      -1.59 0.05
## Gender*         -1.79 0.06
## Exam_1           6.82 0.83
## Exam_2           0.13 1.25
## Total_Score      3.49 1.80
## True_Color*     -1.13 0.17
## Green            1.40 0.04
## Blue            -1.59 0.05
## Gold            -1.19 0.05
## Orange           4.42 0.04
## E_I             -1.41 0.05
## S_N             -1.51 0.05
## T_F             -2.02 0.06
## J_P             -0.06 0.05
## GPA             -1.15 0.03
## ST               0.23 0.05
## SF               1.95 0.04
## NF              -1.79 0.06
## NT              -1.05 0.05
## Internal*       -1.23 0.13
## NumMatches*     -1.20 0.13
## Chair_Match*    -0.06 0.05

cor(compdata$Total_Score,compdata$GPA)

## [1] 0.4496644

Imputation

Chair Match Unknown is singular with Number of Matches Unknown. So we use Chair Match==1 versus Chair Match \(\ne 1\)

Plots

Total Score

library(ggplot2)

## Warning: package 'ggplot2' was built under R version 4.0.5

## 
## Attaching package: 'ggplot2'

## The following objects are masked from 'package:psych':
## 
##     %+%, alpha

hist(compdata$Total_Score, main='Comprehensive Examination Score', col='red', xlab='Total Score')

## Score by Gender

boxplot(compdata$Total_Score~compdata$Gender, notch=T, horizontal=T, col=c("red", "blue"),main="Gender Differences", xlab='Frequency', ylab='')

Scores ~ Underrepresented

boxplot(compdata$Total_Score~compdata$Diversity, notch=TRUE, horizontal=TRUE, col=c("red", "blue"),
        main="Scores by Diversity", ylab="", xlab='Frequency')

Scores ~ True Color

boxplot(compdata$Total_Score~compdata$True_Color, notch=FALSE, horizontal=TRUE, col=c('blue','gold','dark green', 'dark orange','gray'),main="True Color Differences", ylab='',xlab="Frequency")

Scores ~ Function Pair Differences

boxplot(compdata$Total_Score~compdata$Internal, notch=FALSE, horizontal=TRUE, main="Function Pair Differences", xlab='Frequency', ylab='')

Scores ~ Num Matches

boxplot(compdata$Exam_2~as.factor(compdata$NumMatches), notch=F, horizontal=TRUE, main="True Color Student # of Matches with Committee Members", xlab='frequency',ylab="Number of Matches")

Inferentials

Linear Model, Total Score

NOTE: Removed number of matches until we have complete data.

mylm=lm(Total_Score~Gender+Diversity+GPA+as.factor(True_Color)+Internal+Chair_Match+NumMatches, data=compdata)
summary(mylm)

## 
## Call:
## lm(formula = Total_Score ~ Gender + Diversity + GPA + as.factor(True_Color) + 
##     Internal + Chair_Match + NumMatches, data = compdata)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -55.222  -5.179   1.391   8.082  28.768 
## 
## Coefficients:
##                              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                   68.7457    26.0910   2.635 0.010656 *  
## GenderMale                    -0.1618     3.6410  -0.044 0.964707    
## DiversityUnderrepresented     -3.9079     3.6551  -1.069 0.289213    
## GPA                           26.7802     7.0067   3.822 0.000313 ***
## as.factor(True_Color)Gold     -9.0759     4.7144  -1.925 0.058874 .  
## as.factor(True_Color)Green    -6.8188     5.4250  -1.257 0.213571    
## as.factor(True_Color)Orange   -9.9830     6.0522  -1.649 0.104192    
## as.factor(True_Color)Unknown -12.1672     5.2244  -2.329 0.023194 *  
## InternalNT                    -1.9917     4.4211  -0.450 0.653952    
## InternalSF                   -12.5543     5.2075  -2.411 0.018946 *  
## InternalST                     2.2839     5.2568   0.434 0.665484    
## Chair_MatchY                  -1.9578     5.5278  -0.354 0.724424    
## NumMatches1                    0.1290     4.5857   0.028 0.977650    
## NumMatches2                    8.6428     7.4885   1.154 0.252940    
## NumMatchesUnknown              3.5263     4.5730   0.771 0.443607    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 13.79 on 61 degrees of freedom
## Multiple R-squared:  0.3705, Adjusted R-squared:  0.226 
## F-statistic: 2.564 on 14 and 61 DF,  p-value: 0.005829

Linear Model 2, Part 1 Score

mylm=lm(Exam_1~Gender+Diversity+GPA+as.factor(True_Color)+Internal+Chair_Match+NumMatches, data=compdata)
summary(mylm)

## 
## Call:
## lm(formula = Exam_1 ~ Gender + Diversity + GPA + as.factor(True_Color) + 
##     Internal + Chair_Match + NumMatches, data = compdata)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -28.349  -3.191   1.514   3.977  10.565 
## 
## Coefficients:
##                              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                   51.1372    13.5828   3.765 0.000377 ***
## GenderMale                     0.5055     1.8955   0.267 0.790588    
## DiversityUnderrepresented     -1.2569     1.9028  -0.661 0.511376    
## GPA                            8.8716     3.6476   2.432 0.017959 *  
## as.factor(True_Color)Gold     -5.2520     2.4543  -2.140 0.036367 *  
## as.factor(True_Color)Green    -2.6985     2.8242  -0.955 0.343110    
## as.factor(True_Color)Orange   -5.2603     3.1508  -1.670 0.100133    
## as.factor(True_Color)Unknown  -5.0810     2.7198  -1.868 0.066547 .  
## InternalNT                     1.0715     2.3016   0.466 0.643212    
## InternalSF                    -4.1953     2.7110  -1.548 0.126914    
## InternalST                     0.6574     2.7367   0.240 0.810974    
## Chair_MatchY                  -0.5338     2.8777  -0.185 0.853466    
## NumMatches1                   -0.8491     2.3873  -0.356 0.723324    
## NumMatches2                   -0.4675     3.8985  -0.120 0.904935    
## NumMatchesUnknown              1.6189     2.3807   0.680 0.499051    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 7.182 on 61 degrees of freedom
## Multiple R-squared:  0.2067, Adjusted R-squared:  0.02464 
## F-statistic: 1.135 on 14 and 61 DF,  p-value: 0.3474

Linear Model 2, Score 2

mylm=lm(Exam_2~Gender+Diversity+GPA+as.factor(True_Color)+Chair_Match+NumMatches, data=compdata)
summary(mylm)

## 
## Call:
## lm(formula = Exam_2 ~ Gender + Diversity + GPA + as.factor(True_Color) + 
##     Chair_Match + NumMatches, data = compdata)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -35.123  -5.546   1.099   6.490  15.150 
## 
## Coefficients:
##                              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                   10.8581    17.8063   0.610 0.544163    
## GenderMale                     0.4207     2.4853   0.169 0.866108    
## DiversityUnderrepresented     -2.5074     2.5495  -0.983 0.329071    
## GPA                           18.8877     4.8148   3.923 0.000216 ***
## as.factor(True_Color)Gold     -1.5303     3.0345  -0.504 0.615785    
## as.factor(True_Color)Green    -2.5623     3.6935  -0.694 0.490373    
## as.factor(True_Color)Orange   -2.7537     4.1306  -0.667 0.507395    
## as.factor(True_Color)Unknown  -6.9330     3.6660  -1.891 0.063130 .  
## Chair_MatchY                  -0.4130     3.6062  -0.115 0.909172    
## NumMatches1                    0.3534     3.2091   0.110 0.912650    
## NumMatches2                    8.4115     5.0617   1.662 0.101441    
## NumMatchesUnknown              1.7186     3.1652   0.543 0.589048    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 9.712 on 64 degrees of freedom
## Multiple R-squared:   0.32,  Adjusted R-squared:  0.2031 
## F-statistic: 2.737 on 11 and 64 DF,  p-value: 0.005795

Kim

Sith

2021

Load Data

Describe Data

Imputation

Plots

Total Score

Scores ~ Underrepresented

Scores ~ True Color

Scores ~ Function Pair Differences

Scores ~ Num Matches

Inferentials

Linear Model, Total Score

Linear Model 2, Part 1 Score

Linear Model 2, Score 2