library(psych)
## Warning: package 'psych' was built under R version 4.0.5
setwd("D:/desktopbackup/kim")
compdata=read.csv("kim study.csv", stringsAsFactors = T)
compdata$Gender=as.factor(compdata$Gender)
levels(compdata$Gender)=c("Female", "Male")
describe(compdata[,2:length(compdata)])
## vars n mean sd median trimmed mad min max range skew
## Diversity* 1 76 1.66 0.48 2.00 1.69 0.00 1.0 2.0 1.0 -0.65
## Gender* 2 76 1.38 0.49 1.00 1.35 0.00 1.0 2.0 1.0 0.48
## Exam_1 3 76 78.47 7.27 79.60 79.43 5.19 45.3 90.0 44.7 -2.22
## Exam_2 4 76 75.44 10.88 76.55 75.96 9.34 45.3 95.6 50.3 -0.57
## Total_Score 5 76 153.90 15.68 155.85 155.56 13.64 90.6 178.2 87.6 -1.51
## True_Color* 6 76 2.58 1.44 2.00 2.48 1.48 1.0 5.0 4.0 0.51
## Green 7 76 0.16 0.37 0.00 0.08 0.00 0.0 1.0 1.0 1.84
## Blue 8 76 0.34 0.48 0.00 0.31 0.00 0.0 1.0 1.0 0.65
## Gold 9 76 0.29 0.46 0.00 0.24 0.00 0.0 1.0 1.0 0.91
## Orange 10 76 0.11 0.31 0.00 0.02 0.00 0.0 1.0 1.0 2.52
## E_I 11 76 0.32 0.47 0.00 0.27 0.00 0.0 1.0 1.0 0.78
## S_N 12 76 0.67 0.47 1.00 0.71 0.00 0.0 1.0 1.0 -0.71
## T_F 13 76 0.53 0.50 1.00 0.53 0.00 0.0 1.0 1.0 -0.10
## J_P 14 76 0.21 0.41 0.00 0.15 0.00 0.0 1.0 1.0 1.39
## GPA 15 76 3.56 0.26 3.60 3.56 0.33 3.0 4.0 1.0 -0.14
## ST 16 76 0.20 0.40 0.00 0.13 0.00 0.0 1.0 1.0 1.49
## SF 17 76 0.14 0.35 0.00 0.06 0.00 0.0 1.0 1.0 1.98
## NF 18 76 0.38 0.49 0.00 0.35 0.00 0.0 1.0 1.0 0.48
## NT 19 76 0.28 0.45 0.00 0.23 0.00 0.0 1.0 1.0 0.98
## Internal* 20 76 2.16 1.14 2.00 2.08 1.48 1.0 4.0 3.0 0.49
## NumMatches* 21 76 2.18 1.15 2.00 2.11 1.48 1.0 4.0 3.0 0.52
## Chair_Match* 22 76 1.21 0.41 1.00 1.15 0.00 1.0 2.0 1.0 1.39
## kurtosis se
## Diversity* -1.59 0.05
## Gender* -1.79 0.06
## Exam_1 6.82 0.83
## Exam_2 0.13 1.25
## Total_Score 3.49 1.80
## True_Color* -1.13 0.17
## Green 1.40 0.04
## Blue -1.59 0.05
## Gold -1.19 0.05
## Orange 4.42 0.04
## E_I -1.41 0.05
## S_N -1.51 0.05
## T_F -2.02 0.06
## J_P -0.06 0.05
## GPA -1.15 0.03
## ST 0.23 0.05
## SF 1.95 0.04
## NF -1.79 0.06
## NT -1.05 0.05
## Internal* -1.23 0.13
## NumMatches* -1.20 0.13
## Chair_Match* -0.06 0.05
cor(compdata$Total_Score,compdata$GPA)
## [1] 0.4496644
Chair Match Unknown is singular with Number of Matches Unknown. So we use Chair Match==1 versus Chair Match \(\ne 1\)
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.0.5
##
## Attaching package: 'ggplot2'
## The following objects are masked from 'package:psych':
##
## %+%, alpha
hist(compdata$Total_Score, main='Comprehensive Examination Score', col='red', xlab='Total Score')
## Score by Gender
boxplot(compdata$Total_Score~compdata$Gender, notch=T, horizontal=T, col=c("red", "blue"),main="Gender Differences", xlab='Frequency', ylab='')
boxplot(compdata$Total_Score~compdata$Diversity, notch=TRUE, horizontal=TRUE, col=c("red", "blue"),
main="Scores by Diversity", ylab="", xlab='Frequency')
boxplot(compdata$Total_Score~compdata$True_Color, notch=FALSE, horizontal=TRUE, col=c('blue','gold','dark green', 'dark orange','gray'),main="True Color Differences", ylab='',xlab="Frequency")
boxplot(compdata$Total_Score~compdata$Internal, notch=FALSE, horizontal=TRUE, main="Function Pair Differences", xlab='Frequency', ylab='')
boxplot(compdata$Exam_2~as.factor(compdata$NumMatches), notch=F, horizontal=TRUE, main="True Color Student # of Matches with Committee Members", xlab='frequency',ylab="Number of Matches")
NOTE: Removed number of matches until we have complete data.
mylm=lm(Total_Score~Gender+Diversity+GPA+as.factor(True_Color)+Internal+Chair_Match+NumMatches, data=compdata)
summary(mylm)
##
## Call:
## lm(formula = Total_Score ~ Gender + Diversity + GPA + as.factor(True_Color) +
## Internal + Chair_Match + NumMatches, data = compdata)
##
## Residuals:
## Min 1Q Median 3Q Max
## -55.222 -5.179 1.391 8.082 28.768
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 68.7457 26.0910 2.635 0.010656 *
## GenderMale -0.1618 3.6410 -0.044 0.964707
## DiversityUnderrepresented -3.9079 3.6551 -1.069 0.289213
## GPA 26.7802 7.0067 3.822 0.000313 ***
## as.factor(True_Color)Gold -9.0759 4.7144 -1.925 0.058874 .
## as.factor(True_Color)Green -6.8188 5.4250 -1.257 0.213571
## as.factor(True_Color)Orange -9.9830 6.0522 -1.649 0.104192
## as.factor(True_Color)Unknown -12.1672 5.2244 -2.329 0.023194 *
## InternalNT -1.9917 4.4211 -0.450 0.653952
## InternalSF -12.5543 5.2075 -2.411 0.018946 *
## InternalST 2.2839 5.2568 0.434 0.665484
## Chair_MatchY -1.9578 5.5278 -0.354 0.724424
## NumMatches1 0.1290 4.5857 0.028 0.977650
## NumMatches2 8.6428 7.4885 1.154 0.252940
## NumMatchesUnknown 3.5263 4.5730 0.771 0.443607
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 13.79 on 61 degrees of freedom
## Multiple R-squared: 0.3705, Adjusted R-squared: 0.226
## F-statistic: 2.564 on 14 and 61 DF, p-value: 0.005829
mylm=lm(Exam_1~Gender+Diversity+GPA+as.factor(True_Color)+Internal+Chair_Match+NumMatches, data=compdata)
summary(mylm)
##
## Call:
## lm(formula = Exam_1 ~ Gender + Diversity + GPA + as.factor(True_Color) +
## Internal + Chair_Match + NumMatches, data = compdata)
##
## Residuals:
## Min 1Q Median 3Q Max
## -28.349 -3.191 1.514 3.977 10.565
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 51.1372 13.5828 3.765 0.000377 ***
## GenderMale 0.5055 1.8955 0.267 0.790588
## DiversityUnderrepresented -1.2569 1.9028 -0.661 0.511376
## GPA 8.8716 3.6476 2.432 0.017959 *
## as.factor(True_Color)Gold -5.2520 2.4543 -2.140 0.036367 *
## as.factor(True_Color)Green -2.6985 2.8242 -0.955 0.343110
## as.factor(True_Color)Orange -5.2603 3.1508 -1.670 0.100133
## as.factor(True_Color)Unknown -5.0810 2.7198 -1.868 0.066547 .
## InternalNT 1.0715 2.3016 0.466 0.643212
## InternalSF -4.1953 2.7110 -1.548 0.126914
## InternalST 0.6574 2.7367 0.240 0.810974
## Chair_MatchY -0.5338 2.8777 -0.185 0.853466
## NumMatches1 -0.8491 2.3873 -0.356 0.723324
## NumMatches2 -0.4675 3.8985 -0.120 0.904935
## NumMatchesUnknown 1.6189 2.3807 0.680 0.499051
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 7.182 on 61 degrees of freedom
## Multiple R-squared: 0.2067, Adjusted R-squared: 0.02464
## F-statistic: 1.135 on 14 and 61 DF, p-value: 0.3474
mylm=lm(Exam_2~Gender+Diversity+GPA+as.factor(True_Color)+Chair_Match+NumMatches, data=compdata)
summary(mylm)
##
## Call:
## lm(formula = Exam_2 ~ Gender + Diversity + GPA + as.factor(True_Color) +
## Chair_Match + NumMatches, data = compdata)
##
## Residuals:
## Min 1Q Median 3Q Max
## -35.123 -5.546 1.099 6.490 15.150
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 10.8581 17.8063 0.610 0.544163
## GenderMale 0.4207 2.4853 0.169 0.866108
## DiversityUnderrepresented -2.5074 2.5495 -0.983 0.329071
## GPA 18.8877 4.8148 3.923 0.000216 ***
## as.factor(True_Color)Gold -1.5303 3.0345 -0.504 0.615785
## as.factor(True_Color)Green -2.5623 3.6935 -0.694 0.490373
## as.factor(True_Color)Orange -2.7537 4.1306 -0.667 0.507395
## as.factor(True_Color)Unknown -6.9330 3.6660 -1.891 0.063130 .
## Chair_MatchY -0.4130 3.6062 -0.115 0.909172
## NumMatches1 0.3534 3.2091 0.110 0.912650
## NumMatches2 8.4115 5.0617 1.662 0.101441
## NumMatchesUnknown 1.7186 3.1652 0.543 0.589048
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 9.712 on 64 degrees of freedom
## Multiple R-squared: 0.32, Adjusted R-squared: 0.2031
## F-statistic: 2.737 on 11 and 64 DF, p-value: 0.005795