library(psych)
## Warning: package 'psych' was built under R version 4.0.5
setwd("D:/kim graphs")
compdata=read.csv("kim study.csv", stringsAsFactors = T)
compdata$Gender=as.factor(compdata$Gender)
levels(compdata$Gender)=c("Female", "Male")
describe(compdata[,2:length(compdata)])
## vars n mean sd median trimmed mad min max range skew
## Diversity* 1 111 1.61 0.49 2.00 1.64 0.00 1.0 2.0 1.0 -0.46
## Gender* 2 111 1.32 0.47 1.00 1.27 0.00 1.0 2.0 1.0 0.78
## Exam_1 3 111 78.69 6.68 79.20 79.29 5.49 45.3 92.0 46.7 -1.94
## Exam_2 4 111 76.15 10.66 76.90 76.97 9.19 45.3 95.6 50.3 -0.81
## Total_Score 5 111 154.84 14.88 156.00 156.24 12.75 90.6 178.2 87.6 -1.36
## True_Color* 6 111 2.43 1.33 2.00 2.29 1.48 1.0 5.0 4.0 0.67
## Green 7 111 0.15 0.36 0.00 0.07 0.00 0.0 1.0 1.0 1.90
## Blue 8 111 0.32 0.47 0.00 0.27 0.00 0.0 1.0 1.0 0.78
## Gold 9 111 0.34 0.48 0.00 0.30 0.00 0.0 1.0 1.0 0.66
## Orange 10 111 0.12 0.32 0.00 0.02 0.00 0.0 1.0 1.0 2.35
## E_I 11 111 0.37 0.48 0.00 0.34 0.00 0.0 1.0 1.0 0.53
## S_N 12 111 0.64 0.48 1.00 0.67 0.00 0.0 1.0 1.0 -0.57
## T_F 13 111 0.50 0.50 1.00 0.51 0.00 0.0 1.0 1.0 -0.02
## J_P 14 111 0.18 0.39 0.00 0.10 0.00 0.0 1.0 1.0 1.64
## GPA 15 111 3.61 0.26 3.64 3.62 0.33 3.0 4.0 1.0 -0.31
## ST 16 111 0.23 0.43 0.00 0.17 0.00 0.0 1.0 1.0 1.24
## SF 17 111 0.13 0.33 0.00 0.03 0.00 0.0 1.0 1.0 2.22
## NF 18 111 0.38 0.49 0.00 0.35 0.00 0.0 1.0 1.0 0.49
## NT 19 111 0.26 0.44 0.00 0.20 0.00 0.0 1.0 1.0 1.07
## Internal* 20 111 2.23 1.18 2.00 2.17 1.48 1.0 4.0 3.0 0.39
## NumMatches* 21 111 2.20 1.37 2.00 2.00 1.48 1.0 5.0 4.0 1.10
## Chair_Match* 22 111 1.21 0.41 1.00 1.13 0.00 1.0 2.0 1.0 1.43
## kurtosis se
## Diversity* -1.81 0.05
## Gender* -1.40 0.04
## Exam_1 7.04 0.63
## Exam_2 0.65 1.01
## Total_Score 3.28 1.41
## True_Color* -0.77 0.13
## Green 1.63 0.03
## Blue -1.40 0.04
## Gold -1.58 0.05
## Orange 3.55 0.03
## E_I -1.73 0.05
## S_N -1.69 0.05
## T_F -2.02 0.05
## J_P 0.70 0.04
## GPA -1.08 0.03
## ST -0.47 0.04
## SF 2.96 0.03
## NF -1.77 0.05
## NT -0.86 0.04
## Internal* -1.37 0.11
## NumMatches* -0.04 0.13
## Chair_Match* 0.03 0.04
cor(compdata$Total_Score,compdata$GPA)
## [1] 0.4596028
Chair Match Unknown is singular with Number of Matches Unknown. So we use Chair Match==1 versus Chair Match ≠1
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.0.5
##
## Attaching package: 'ggplot2'
## The following objects are masked from 'package:psych':
##
## %+%, alpha
ggplot(compdata, aes(x=Total_Score), col=Gender)+
geom_histogram(binwidth=7,col='black',fill='red')+
ggtitle('Comprehensive Examination Score')+
ylab('Frequency')+
xlab('Score')
barplot(table(compdata$True_Color), col=c('blue','gold', 'green', 'orange', 'gray'), main='True Color', ylab='Frequency')
## Score by True Color
boxplot(compdata$Total_Score~compdata$True_Color, notch=F, horizontal=T, col=c('blue','gold', 'green', 'orange', 'gray'),main="True Color Differences", xlab='Frequency', ylab='')
boxplot(compdata$Total_Score~compdata$Gender, notch=T, horizontal=T, col=c("red", "blue"),main="Gender Differences", xlab='Frequency', ylab='')
boxplot(compdata$Total_Score~compdata$Diversity, notch=TRUE, horizontal=TRUE, col=c("red", "blue"),
main="Scores by Diversity", ylab="", xlab='Frequency')
boxplot(compdata$Total_Score~compdata$True_Color, notch=FALSE, horizontal=TRUE, col=c('blue','gold','dark green', 'dark orange','gray'),main="True Color Differences", ylab='',xlab="Frequency")
boxplot(compdata$Total_Score~compdata$Internal, notch=T, horizontal=TRUE, main="Function Pair Differences", xlab='Frequency', ylab='')
boxplot(compdata$Exam_2~as.factor(compdata$NumMatches), notch=F, horizontal=TRUE, main="True Color Student # of Matches with Committee Members", xlab='frequency',ylab="Number of Matches")
## Scores ~ Chair Match
boxplot(compdata$Exam_2~as.factor(compdata$Chair_Match), notch=T, horizontal=TRUE, main="True Color Student vs. Chair Match", xlab='frequency',ylab="Chair Match")
# Inferentials
NOTE: Removed number of matches until we have complete data.
mylm=lm(Total_Score~Gender+Diversity+GPA+as.factor(True_Color)+Internal+Chair_Match+NumMatches, data=compdata)
summary(mylm)
##
## Call:
## lm(formula = Total_Score ~ Gender + Diversity + GPA + as.factor(True_Color) +
## Internal + Chair_Match + NumMatches, data = compdata)
##
## Residuals:
## Min 1Q Median 3Q Max
## -54.840 -5.432 0.000 7.182 30.626
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 76.1154 19.8520 3.834 0.000226 ***
## GenderMale 0.3084 2.7968 0.110 0.912419
## DiversityUnderrepresented -3.6405 2.6516 -1.373 0.173003
## GPA 23.7437 5.3238 4.460 2.25e-05 ***
## as.factor(True_Color)Gold -6.8043 3.4527 -1.971 0.051667 .
## as.factor(True_Color)Green -5.7287 4.2319 -1.354 0.179048
## as.factor(True_Color)Orange -8.1258 4.3371 -1.874 0.064063 .
## as.factor(True_Color)Unknown -10.0701 4.4919 -2.242 0.027298 *
## InternalNT -0.3392 3.3326 -0.102 0.919146
## InternalSF -13.2188 3.9451 -3.351 0.001158 **
## InternalST 1.9328 3.5732 0.541 0.589838
## Chair_MatchY -1.7438 3.9676 -0.440 0.661295
## NumMatches1 2.1310 3.2643 0.653 0.515454
## NumMatches2 6.3666 5.4264 1.173 0.243625
## NumMatches3 21.3118 13.7340 1.552 0.124045
## NumMatchesUnknown 4.3330 3.8970 1.112 0.268998
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 12.5 on 95 degrees of freedom
## Multiple R-squared: 0.3901, Adjusted R-squared: 0.2938
## F-statistic: 4.05 on 15 and 95 DF, p-value: 1.202e-05
mylm=lm(Exam_1~Gender+Diversity+GPA+as.factor(True_Color)+Internal+Chair_Match+NumMatches, data=compdata)
summary(mylm)
##
## Call:
## lm(formula = Exam_1 ~ Gender + Diversity + GPA + as.factor(True_Color) +
## Internal + Chair_Match + NumMatches, data = compdata)
##
## Residuals:
## Min 1Q Median 3Q Max
## -29.6496 -2.4840 0.5882 4.3385 9.9204
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 52.0552 10.1167 5.145 1.43e-06 ***
## GenderMale 0.5951 1.4253 0.418 0.67722
## DiversityUnderrepresented -0.6182 1.3513 -0.457 0.64838
## GPA 8.1844 2.7130 3.017 0.00328 **
## as.factor(True_Color)Gold -3.7935 1.7595 -2.156 0.03361 *
## as.factor(True_Color)Green -1.6418 2.1566 -0.761 0.44838
## as.factor(True_Color)Orange -4.3535 2.2102 -1.970 0.05178 .
## as.factor(True_Color)Unknown -4.2827 2.2891 -1.871 0.06444 .
## InternalNT 0.3881 1.6983 0.228 0.81975
## InternalSF -3.0015 2.0104 -1.493 0.13876
## InternalST 0.2166 1.8209 0.119 0.90555
## Chair_MatchY -1.1643 2.0219 -0.576 0.56609
## NumMatches1 -0.4472 1.6635 -0.269 0.78862
## NumMatches2 0.8663 2.7653 0.313 0.75476
## NumMatches3 13.2136 6.9989 1.888 0.06208 .
## NumMatchesUnknown 1.9550 1.9859 0.984 0.32741
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 6.371 on 95 degrees of freedom
## Multiple R-squared: 0.2142, Adjusted R-squared: 0.09014
## F-statistic: 1.727 on 15 and 95 DF, p-value: 0.05837
mylm=lm(Exam_2~Gender+Diversity+GPA+as.factor(True_Color)+Chair_Match+NumMatches, data=compdata)
summary(mylm)
##
## Call:
## lm(formula = Exam_2 ~ Gender + Diversity + GPA + as.factor(True_Color) +
## Chair_Match + NumMatches, data = compdata)
##
## Residuals:
## Min 1Q Median 3Q Max
## -35.286 -3.850 0.172 6.032 16.469
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 18.94257 14.60172 1.297 0.1976
## GenderMale 1.07191 2.08674 0.514 0.6086
## DiversityUnderrepresented -3.39127 1.99644 -1.699 0.0926 .
## GPA 16.21149 3.97092 4.083 9.1e-05 ***
## as.factor(True_Color)Gold -0.51056 2.38343 -0.214 0.8308
## as.factor(True_Color)Green -1.50754 3.09753 -0.487 0.6276
## as.factor(True_Color)Orange -2.57731 3.23211 -0.797 0.4271
## as.factor(True_Color)Unknown -5.26765 3.40092 -1.549 0.1246
## Chair_MatchY 0.01827 2.90094 0.006 0.9950
## NumMatches1 2.30239 2.43539 0.945 0.3468
## NumMatches2 5.06248 4.04535 1.251 0.2138
## NumMatches3 7.21617 10.13544 0.712 0.4782
## NumMatchesUnknown 2.48395 2.94982 0.842 0.4018
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 9.506 on 98 degrees of freedom
## Multiple R-squared: 0.292, Adjusted R-squared: 0.2053
## F-statistic: 3.368 on 12 and 98 DF, p-value: 0.0003907