## Import the raw data
library(readr)
q1q2merged <- read_csv("~/Desktop/Current Work/[P5]Open-book Project/q1q2merged.csv")
## Warning: 772175 parsing failures.
## row col expected actual file
## 1 DateResponseEntered valid date 43:43.0 '~/Desktop/Current Work/[P5]Open-book Project/q1q2merged.csv'
## 2 DateResponseEntered valid date 45:10.4 '~/Desktop/Current Work/[P5]Open-book Project/q1q2merged.csv'
## 4 DateResponseEntered valid date 25:17.3 '~/Desktop/Current Work/[P5]Open-book Project/q1q2merged.csv'
## 5 DateResponseEntered valid date 31:02.5 '~/Desktop/Current Work/[P5]Open-book Project/q1q2merged.csv'
## 6 DateResponseEntered valid date 39:35.9 '~/Desktop/Current Work/[P5]Open-book Project/q1q2merged.csv'
## ... ................... .......... ....... .............................................................
## See problems(...) for more details.
head(q1q2merged,6)
## # A tibble: 6 x 25
## Code UIC NameDegree Gender CertYear Resident Defrocked Age
## <dbl> <chr> <chr> <chr> <dbl> <chr> <chr> <dbl>
## 1 1745 U MD M 1970 No No 81
## 2 1745 U MD M 1970 No No 81
## 3 1745 U MD M 1970 No No 81
## 4 1745 U MD M 1970 No No 81
## 5 1745 U MD M 1970 No No 81
## 6 1745 U MD M 1970 No No 81
## # … with 17 more variables: DateStarted <time>, DateCompleted <time>,
## # ItemPoolNumber <dbl>, PhysicianQuestionOrder <dbl>, ItemIndex <dbl>,
## # AssistanceDesc <chr>, ConfidenceDesc <chr>, Comment <dbl>,
## # DisplayOrder <dbl>, AnswerChoice <chr>, Response <chr>,
## # FlagIsAnswerCorrect <dbl>, DateResponseEntered <time>,
## # TimeSpentInSeconds <dbl>, FirstCritiqueView <lgl>,
## # LastCritiqueView <lgl>, NumCritiqueViews <lgl>
## Subset the useful variables
open_book_variables <- c("Code", "NameDegree", "CertYear","Age","Gender","ItemPoolNumber","PhysicianQuestionOrder","ItemIndex","AssistanceDesc","ConfidenceDesc","DisplayOrder","AnswerChoice","FlagIsAnswerCorrect","TimeSpentInSeconds")
open_book <- q1q2merged[open_book_variables]
## Import the Rasch Result file
pfile <- read_csv("~/Desktop/Current Work/[P5]Open-book Project/pfile.csv")
## Subset the useful variables
pfile_variable <- c("NAME","SCORE","MEASURE","IN.MSQ","OUT.MSQ")
rasch_re <- pfile[pfile_variable]
## relevel the variable
open_book$AssistanceDesc <- as.factor(open_book$AssistanceDesc)
## Check the frequency of each level
table(open_book$AssistanceDesc)
##
## I answered it without assistance
## 385198
## I collaborated and looked up the answer
## 474
## I collaborated with a colleague
## 2470
## I looked up the answer
## 12958
open_book$AssistanceDesc <- factor(open_book$AssistanceDesc, levels = c("I answered it without assistance","I looked up the answer", "I collaborated with a colleague","I collaborated and looked up the answer"))
## Create a new variable as open-book or not
library(dplyr)
open_book$OB_indicator <- recode(open_book$AssistanceDesc, "I answered it without assistance"="No","I looked up the answer"="Yes","I collaborated with a colleague"="Yes","I collaborated and looked up the answer"="Yes")
open_book$OB_degree <- recode(open_book$AssistanceDesc, "I answered it without assistance"= 1,"I looked up the answer"= 2,"I collaborated with a colleague"= 3,"I collaborated and looked up the answer"=4)
open_book$OB_indicator <- factor(open_book$OB_indicator,levels=c("No","Yes"))
## Check the confidence variable
table(open_book$ConfidenceDesc)
##
## Extremely Confident Moderately Confident Not at all Confident
## 25138 114044 46976
## Pretty Confident Slightly Confident Very Confident
## 70353 106017 38572
## relevel the confidence
open_book$ConfidenceDesc <- factor(open_book$ConfidenceDesc,levels = c("Extremely Confident","Very Confident","Pretty Confident","Moderately Confident","Slightly Confident","Not at all Confident"))
open_book$Confidence <- recode(open_book$ConfidenceDesc, "Extremely Confident"= 6,"Very Confident"= 5,"Pretty Confident"= 4,"Moderately Confident"= 3, "Slightly Confident"= 2,"Not at all Confident"=1)
## Build a variable call certified year to reflec the doctoral experiences of each candidate
open_book$Certified <- 2019 - open_book$CertYear
## Grouping Variable by Name
open_book_1 <- open_book %>% group_by(Code)%>%
summarize(
Certified=mean(Certified),
Age=mean(Age),
OB_degree=mean(OB_degree),
Confidence=mean(Confidence),
Gender=Gender[1],
NameDegree=NameDegree[1],
OB_indicator=OB_indicator[1]
)
colnames(open_book_1)[1] <- "NAME"
## Join the data with Rasch result
open_book_rasch <- left_join(rasch_re,open_book_1,by="NAME")
## Check the data
head(open_book_rasch,6)
## # A tibble: 6 x 12
## NAME SCORE MEASURE IN.MSQ OUT.MSQ Certified Age OB_degree Confidence
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 1745 35 0.92 1.04 1.10 49 81 1 2.78
## 2 2097 33 0.72 1.06 1.13 49 84 1.02 2.34
## 3 4049 27 0.17 1.11 1.12 48 76 1 3.6
## 4 4233 34 0.82 0.867 0.824 48 74 1 2.28
## 5 5843 15 0.56 1.03 1.01 47 83 1 4.04
## 6 6668 28 0.26 1.15 1.18 47 79 1 3.36
## # … with 3 more variables: Gender <chr>, NameDegree <chr>,
## # OB_indicator <fct>
library(psych)
describe(open_book_rasch)
## Warning in describe(open_book_rasch): NAs introduced by coercion
## Warning in describe(open_book_rasch): NAs introduced by coercion
## Warning in FUN(newX[, i], ...): no non-missing arguments to min; returning
## Inf
## Warning in FUN(newX[, i], ...): no non-missing arguments to min; returning
## Inf
## Warning in FUN(newX[, i], ...): no non-missing arguments to max; returning
## -Inf
## Warning in FUN(newX[, i], ...): no non-missing arguments to max; returning
## -Inf
## vars n mean sd median trimmed mad
## NAME 1 9690 103141.73 41667.22 106461.50 104834.65 51077.79
## SCORE 2 9690 29.48 9.93 32.00 29.72 11.86
## MEASURE 3 9690 1.05 0.66 1.02 1.02 0.59
## IN.MSQ 4 9690 1.00 0.09 1.00 1.00 0.09
## OUT.MSQ 5 9690 1.00 0.17 0.98 0.99 0.14
## Certified 6 9666 19.36 10.53 18.00 18.87 11.86
## Age 7 9690 49.20 10.52 49.00 49.04 11.86
## OB_degree 8 9690 1.05 0.13 1.00 1.02 0.00
## Confidence 9 9690 3.06 0.81 3.02 3.03 0.83
## Gender* 10 9690 NaN NA NA NaN NA
## NameDegree* 11 9690 NaN NA NA NaN NA
## OB_indicator* 12 9690 1.04 0.19 1.00 1.00 0.00
## min max range skew kurtosis se
## NAME 1745.00 168519.00 166774.00 -0.28 -1.03 423.28
## SCORE 4.00 50.00 46.00 -0.26 -1.26 0.10
## MEASURE -1.91 5.30 7.21 0.83 3.02 0.01
## IN.MSQ 0.70 1.47 0.77 0.19 0.23 0.00
## OUT.MSQ 0.30 4.45 4.16 1.70 23.52 0.00
## Certified 2.00 49.00 47.00 0.33 -0.84 0.11
## Age 28.00 88.00 60.00 0.14 -0.84 0.11
## OB_degree 1.00 3.00 2.00 6.52 63.78 0.00
## Confidence 1.00 6.00 5.00 0.26 -0.02 0.01
## Gender* Inf -Inf -Inf NA NA NA
## NameDegree* Inf -Inf -Inf NA NA NA
## OB_indicator* 1.00 2.00 1.00 4.77 20.74 0.00
# Age: Mean = 49.2; sd = 10.52; Ranged from 28 to 88.
# Score: Mean = 29.48; sd = 9.93; Ranged from 4 to 50.
# Certified Year: Mean = 19.36; sd = 10.53; Ranged from 2 to 49.
# Measures: Mean = 1.05, sd = 0.66, Ranged from -1.91 to 5.30 on logit scale.
# OB_degree: Mean = 1.05, sd = 0.13, Ranged from 1 to 3.
# Confidence: Mean = 3.06, sd = 1.36m Ranged from 1 to 6.
library(jmv)
# Use the descritptives function to get the descritptive data
descriptives(open_book_rasch, vars = vars(Age, SCORE, Certified,OB_degree,Confidence,OB_indicator), freq = TRUE)
##
## DESCRIPTIVES
##
## Descriptives
## ────────────────────────────────────────────────────────────────────────────────────
## Age SCORE Certified OB_degree Confidence OB_indicator
## ────────────────────────────────────────────────────────────────────────────────────
## N 9690 9690 9666 9690 9690 9690
## Missing 0 0 24 0 0 0
## Mean 49.2 29.5 19.4 1.05 3.06
## Median 49.0 32.0 18.0 1.00 3.02
## Minimum 28.0 4.00 2.00 1.00 1.00
## Maximum 88.0 50.0 49.0 3.00 6.00
## ────────────────────────────────────────────────────────────────────────────────────
##
##
## FREQUENCIES
##
## Frequencies of OB_indicator
## ──────────────────────────────────────────────────
## Levels Counts % of Total Cumulative %
## ──────────────────────────────────────────────────
## No 9313 96.1 96.1
## Yes 377 3.9 100.0
## ──────────────────────────────────────────────────
# get the frequency table
descriptives(open_book, vars = vars(NameDegree, AssistanceDesc, ConfidenceDesc), freq = TRUE)
##
## DESCRIPTIVES
##
## Descriptives
## ─────────────────────────────────────────────────────────────
## NameDegree AssistanceDesc ConfidenceDesc
## ─────────────────────────────────────────────────────────────
## N 401100 401100 401100
## Missing 0 0 0
## Mean
## Median
## Minimum
## Maximum
## ─────────────────────────────────────────────────────────────
##
##
## FREQUENCIES
##
## Frequencies of NameDegree
## ──────────────────────────────────────────────────
## Levels Counts % of Total Cumulative %
## ──────────────────────────────────────────────────
## DO 39600 9.9 9.9
## MD 361500 90.1 100.0
## ──────────────────────────────────────────────────
##
##
## Frequencies of AssistanceDesc
## ───────────────────────────────────────────────────────────────────────────────────
## Levels Counts % of Total Cumulative %
## ───────────────────────────────────────────────────────────────────────────────────
## I answered it without assistance 385198 96.0 96.0
## I looked up the answer 12958 3.2 99.3
## I collaborated with a colleague 2470 0.6 99.9
## I collaborated and looked up the answer 474 0.1 100.0
## ───────────────────────────────────────────────────────────────────────────────────
##
##
## Frequencies of ConfidenceDesc
## ────────────────────────────────────────────────────────────────
## Levels Counts % of Total Cumulative %
## ────────────────────────────────────────────────────────────────
## Extremely Confident 25138 6.3 6.3
## Very Confident 38572 9.6 15.9
## Pretty Confident 70353 17.5 33.4
## Moderately Confident 114044 28.4 61.9
## Slightly Confident 106017 26.4 88.3
## Not at all Confident 46976 11.7 100.0
## ────────────────────────────────────────────────────────────────
# Histogram for Confidence
hist(open_book_rasch$Confidence)
# Histogram for Age
hist(open_book_rasch$Age) # The Histogram of the age might be strongly affected by the number of bins used
plot(density(open_book_rasch$Age))
# Histogram for Assitance
hist(open_book_rasch$OB_degree)
plot(density(open_book_rasch$OB_degree))
# Histogram for Educational degree
table(open_book_rasch$NameDegree)
##
## DO MD
## 964 8726
# Scatter plot of age versus confidence
library(car)
scatterplot(Age ~ Confidence, data=open_book_rasch,
xlab="Confidence", ylab="Age",
main="Scatter Plot of Age VS. Confidence")
It looks like that there is no significant relationship between confidence and age.
But there is a slight trend that to show when the age increases, the confidence level is actually slightly increase.
# Prepare the dataset
open_book_rasch_correlation <- open_book_rasch[,c(2:9)]
library("Hmisc")
rcorr(as.matrix(open_book_rasch_correlation))
## SCORE MEASURE IN.MSQ OUT.MSQ Certified Age OB_degree
## SCORE 1.00 0.48 -0.04 -0.04 0.16 0.10 0.03
## MEASURE 0.48 1.00 -0.07 -0.07 0.12 0.05 0.17
## IN.MSQ -0.04 -0.07 1.00 0.83 -0.17 -0.14 0.03
## OUT.MSQ -0.04 -0.07 0.83 1.00 -0.15 -0.11 0.05
## Certified 0.16 0.12 -0.17 -0.15 1.00 0.90 -0.03
## Age 0.10 0.05 -0.14 -0.11 0.90 1.00 -0.02
## OB_degree 0.03 0.17 0.03 0.05 -0.03 -0.02 1.00
## Confidence 0.13 0.27 -0.04 -0.03 0.04 0.03 0.09
## Confidence
## SCORE 0.13
## MEASURE 0.27
## IN.MSQ -0.04
## OUT.MSQ -0.03
## Certified 0.04
## Age 0.03
## OB_degree 0.09
## Confidence 1.00
##
## n
## SCORE MEASURE IN.MSQ OUT.MSQ Certified Age OB_degree
## SCORE 9690 9690 9690 9690 9666 9690 9690
## MEASURE 9690 9690 9690 9690 9666 9690 9690
## IN.MSQ 9690 9690 9690 9690 9666 9690 9690
## OUT.MSQ 9690 9690 9690 9690 9666 9690 9690
## Certified 9666 9666 9666 9666 9666 9666 9666
## Age 9690 9690 9690 9690 9666 9690 9690
## OB_degree 9690 9690 9690 9690 9666 9690 9690
## Confidence 9690 9690 9690 9690 9666 9690 9690
## Confidence
## SCORE 9690
## MEASURE 9690
## IN.MSQ 9690
## OUT.MSQ 9690
## Certified 9666
## Age 9690
## OB_degree 9690
## Confidence 9690
##
## P
## SCORE MEASURE IN.MSQ OUT.MSQ Certified Age OB_degree
## SCORE 0.0000 0.0001 0.0000 0.0000 0.0000 0.0008
## MEASURE 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000
## IN.MSQ 0.0001 0.0000 0.0000 0.0000 0.0000 0.0135
## OUT.MSQ 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000
## Certified 0.0000 0.0000 0.0000 0.0000 0.0000 0.0017
## Age 0.0000 0.0000 0.0000 0.0000 0.0000 0.0306
## OB_degree 0.0008 0.0000 0.0135 0.0000 0.0017 0.0306
## Confidence 0.0000 0.0000 0.0004 0.0094 0.0002 0.0076 0.0000
## Confidence
## SCORE 0.0000
## MEASURE 0.0000
## IN.MSQ 0.0004
## OUT.MSQ 0.0094
## Certified 0.0002
## Age 0.0076
## OB_degree 0.0000
## Confidence
Interpretation
The biggest correlation could be found in this table is Age vs. Certified year r=0.90, then is the outfit vs. infit r=0.83.
mylogit <- glm(OB_indicator ~ MEASURE + Certified + Confidence + SCORE + NameDegree, data = open_book_rasch, family = "binomial")
# Check the result of our model
summary(mylogit)
##
## Call:
## glm(formula = OB_indicator ~ MEASURE + Certified + Confidence +
## SCORE + NameDegree, family = "binomial", data = open_book_rasch)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -0.9488 -0.2989 -0.2588 -0.2271 2.9030
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -3.314529 0.284097 -11.667 < 2e-16 ***
## MEASURE 0.657261 0.072098 9.116 < 2e-16 ***
## Certified -0.016367 0.005253 -3.116 0.00183 **
## Confidence 0.074983 0.066512 1.127 0.25959
## SCORE -0.012403 0.005595 -2.217 0.02663 *
## NameDegreeMD -0.257804 0.165410 -1.559 0.11910
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 3172.3 on 9665 degrees of freedom
## Residual deviance: 3077.1 on 9660 degrees of freedom
## (24 observations deleted due to missingness)
## AIC: 3089.1
##
## Number of Fisher Scoring iterations: 6
For every one unit change in MEASURE, the log odds of using open-book (versus close-book strategy) increases by 0.657 (P<0.01).
For every one unit change in Certified, the log odds of using open-book (versus close-book strategy) decreases 0.016 (P<0.01).
For every one unit change in SCORE, the log odds of using open-book (versus close-book strategy) decreases by 0.657 (P=0.02).
Prepare the dataset
## Get a balanced sampling data from close_book pool
set.seed(12346)
close_book_pool <- open_book_rasch[which(open_book_rasch$OB_indicator=="No"), ]
open_book_pool <- open_book_rasch[which(open_book_rasch$OB_indicator=="Yes"), ]
close_book_sampling <- sample(1:nrow(close_book_pool), 377)
close_book <- close_book_pool[close_book_sampling,]
balanced_data <- rbind(open_book_pool,close_book)
describe(balanced_data)
## balanced_data
##
## 12 Variables 754 Observations
## ---------------------------------------------------------------------------
## NAME
## n missing distinct Info Mean Gmd .05 .10
## 754 0 754 1 105266 46244 35755 44445
## .25 .50 .75 .90 .95
## 75250 109978 138397 157595 163092
##
## lowest : 8667 8693 8849 11730 14670, highest: 167630 167794 168029 168041 168176
## ---------------------------------------------------------------------------
## SCORE
## n missing distinct Info Mean Gmd .05 .10
## 754 0 42 0.999 29.75 11.74 15.00 16.00
## .25 .50 .75 .90 .95
## 19.00 32.00 38.75 42.00 44.00
##
## lowest : 9 10 11 12 13, highest: 46 47 48 49 50
## ---------------------------------------------------------------------------
## MEASURE
## n missing distinct Info Mean Gmd .05 .10
## 754 0 61 0.999 1.21 0.8138 0.21 0.44
## .25 .50 .75 .90 .95
## 0.72 1.13 1.60 2.02 2.49
##
## lowest : -0.63 -0.58 -0.49 -0.40 -0.35, highest: 3.45 4.08 4.50 4.69 5.30
## ---------------------------------------------------------------------------
## IN.MSQ
## n missing distinct Info Mean Gmd .05 .10
## 754 0 673 1 1.001 0.1029 0.8584 0.8883
## .25 .50 .75 .90 .95
## 0.9405 0.9977 1.0590 1.1170 1.1640
##
## lowest : 0.7052 0.7345 0.7576 0.7621 0.7737, highest: 1.2345 1.2360 1.2364 1.2823 1.3362
## ---------------------------------------------------------------------------
## OUT.MSQ
## n missing distinct Info Mean Gmd .05 .10
## 754 0 695 1 0.994 0.1965 0.7437 0.7939
## .25 .50 .75 .90 .95
## 0.8815 0.9789 1.0896 1.2169 1.2767
##
## lowest : 0.3317 0.3605 0.4010 0.4403 0.5263, highest: 1.5758 1.6520 1.6735 1.7187 4.4517
## ---------------------------------------------------------------------------
## Certified
## n missing distinct Info Mean Gmd .05 .10
## 752 2 42 0.999 18.78 11.69 4 5
## .25 .50 .75 .90 .95
## 10 18 26 34 37
##
## lowest : 3 4 5 6 7, highest: 40 41 42 44 45
## ---------------------------------------------------------------------------
## Age
## n missing distinct Info Mean Gmd .05 .10
## 754 0 48 0.999 48.85 11.66 33 35
## .25 .50 .75 .90 .95
## 41 49 57 62 65
##
## lowest : 28 29 30 31 32, highest: 71 72 73 74 88
## ---------------------------------------------------------------------------
## OB_degree
## n missing distinct Info Mean Gmd .05 .10
## 754 0 61 0.968 1.173 0.2423 1.00 1.00
## .25 .50 .75 .90 .95
## 1.00 1.06 1.22 1.44 1.64
##
## lowest : 1.00 1.02 1.04 1.06 1.08, highest: 2.84 2.92 2.94 2.98 3.00
## ---------------------------------------------------------------------------
## Confidence
## n missing distinct Info Mean Gmd .05 .10
## 754 0 175 1 3.113 0.9339 1.833 2.040
## .25 .50 .75 .90 .95
## 2.520 3.120 3.680 4.220 4.567
##
## lowest : 1.00 1.06 1.12 1.16 1.24, highest: 5.12 5.20 5.28 5.32 5.64
## ---------------------------------------------------------------------------
## Gender
## n missing distinct
## 754 0 2
##
## Value F M
## Frequency 375 379
## Proportion 0.497 0.503
## ---------------------------------------------------------------------------
## NameDegree
## n missing distinct
## 754 0 2
##
## Value DO MD
## Frequency 92 662
## Proportion 0.122 0.878
## ---------------------------------------------------------------------------
## OB_indicator
## n missing distinct
## 754 0 2
##
## Value No Yes
## Frequency 377 377
## Proportion 0.5 0.5
## ---------------------------------------------------------------------------
# Score: Mean = 29.75
# Measure:Mean = 1.21
# Gender: Male = 50.3%; Female=49.7%
# OB_degree: Mean = 1.173
# Confidence: Mean = 3.11
descriptives(balanced_data, vars = vars(Age, SCORE, Certified,OB_degree,Confidence,OB_indicator), freq = TRUE)
##
## DESCRIPTIVES
##
## Descriptives
## ────────────────────────────────────────────────────────────────────────────────────
## Age SCORE Certified OB_degree Confidence OB_indicator
## ────────────────────────────────────────────────────────────────────────────────────
## N 754 754 752 754 754 754
## Missing 0 0 2 0 0 0
## Mean 48.8 29.7 18.8 1.17 3.11
## Median 49.0 32.0 18.0 1.06 3.12
## Minimum 28.0 9.00 3.00 1.00 1.00
## Maximum 88.0 50.0 45.0 3.00 5.64
## ────────────────────────────────────────────────────────────────────────────────────
##
##
## FREQUENCIES
##
## Frequencies of OB_indicator
## ──────────────────────────────────────────────────
## Levels Counts % of Total Cumulative %
## ──────────────────────────────────────────────────
## No 377 50.0 50.0
## Yes 377 50.0 100.0
## ──────────────────────────────────────────────────
## Histogram & Plots
# Histogram for Confidence
hist(balanced_data$Confidence)
# Histogram for Age
hist(balanced_data$Age) # The Histogram of the age might be strongly affected by the number of bins used
plot(density(balanced_data$Age))
# Histogram for Assitance
hist(balanced_data$OB_degree)
plot(density(balanced_data$OB_degree))
# Histogram for Educational degree
table(balanced_data$NameDegree)
##
## DO MD
## 92 662
# Scatter plot of age versus confidence
scatterplot(Age ~ Confidence, data=balanced_data,
xlab="Confidence", ylab="Age",
main="Scatter Plot of Age VS. Confidence")
# Prepare the dataset
open_book_rasch_correlation_balanced <- balanced_data[,c(2:9)]
rcorr(as.matrix(open_book_rasch_correlation_balanced))
## SCORE MEASURE IN.MSQ OUT.MSQ Certified Age OB_degree
## SCORE 1.00 0.43 -0.05 -0.05 0.13 0.06 0.14
## MEASURE 0.43 1.00 -0.09 -0.04 0.07 -0.01 0.39
## IN.MSQ -0.05 -0.09 1.00 0.69 -0.21 -0.18 0.05
## OUT.MSQ -0.05 -0.04 0.69 1.00 -0.11 -0.09 0.09
## Certified 0.13 0.07 -0.21 -0.11 1.00 0.89 -0.09
## Age 0.06 -0.01 -0.18 -0.09 0.89 1.00 -0.08
## OB_degree 0.14 0.39 0.05 0.09 -0.09 -0.08 1.00
## Confidence 0.14 0.29 -0.02 0.03 0.00 -0.02 0.19
## Confidence
## SCORE 0.14
## MEASURE 0.29
## IN.MSQ -0.02
## OUT.MSQ 0.03
## Certified 0.00
## Age -0.02
## OB_degree 0.19
## Confidence 1.00
##
## n
## SCORE MEASURE IN.MSQ OUT.MSQ Certified Age OB_degree Confidence
## SCORE 754 754 754 754 752 754 754 754
## MEASURE 754 754 754 754 752 754 754 754
## IN.MSQ 754 754 754 754 752 754 754 754
## OUT.MSQ 754 754 754 754 752 754 754 754
## Certified 752 752 752 752 752 752 752 752
## Age 754 754 754 754 752 754 754 754
## OB_degree 754 754 754 754 752 754 754 754
## Confidence 754 754 754 754 752 754 754 754
##
## P
## SCORE MEASURE IN.MSQ OUT.MSQ Certified Age OB_degree
## SCORE 0.0000 0.1603 0.1745 0.0003 0.1106 0.0001
## MEASURE 0.0000 0.0161 0.2650 0.0483 0.8200 0.0000
## IN.MSQ 0.1603 0.0161 0.0000 0.0000 0.0000 0.1433
## OUT.MSQ 0.1745 0.2650 0.0000 0.0021 0.0168 0.0096
## Certified 0.0003 0.0483 0.0000 0.0021 0.0000 0.0140
## Age 0.1106 0.8200 0.0000 0.0168 0.0000 0.0240
## OB_degree 0.0001 0.0000 0.1433 0.0096 0.0140 0.0240
## Confidence 0.0002 0.0000 0.5972 0.4846 0.9650 0.6499 0.0000
## Confidence
## SCORE 0.0002
## MEASURE 0.0000
## IN.MSQ 0.5972
## OUT.MSQ 0.4846
## Certified 0.9650
## Age 0.6499
## OB_degree 0.0000
## Confidence
Besides the high correlation between infit & outfit (r=0.69), Age & Certified (r=0.89), some notable correlations show up, which are Measure & OB_degree (r=0.39), Confidence & Measure (r=0.29), Measure & Score (r=0.43).
From the correlation matrix, we can tell that if a participant is more competent, he is also more likely to use OB_book strategy.
mylogit_balanced <- glm(OB_indicator ~ MEASURE + Certified + Confidence + SCORE + NameDegree, data = balanced_data, family = "binomial")
# Check the result of our model
summary(mylogit_balanced)
##
## Call:
## glm(formula = OB_indicator ~ MEASURE + Certified + Confidence +
## SCORE + NameDegree, family = "binomial", data = balanced_data)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.2421 -1.1185 -0.7776 1.1567 1.6190
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.300538 0.406605 -0.739 0.45982
## MEASURE 0.577326 0.125579 4.597 4.28e-06 ***
## Certified -0.023017 0.007606 -3.026 0.00248 **
## Confidence 0.107057 0.095071 1.126 0.26014
## SCORE -0.010670 0.008245 -1.294 0.19564
## NameDegreeMD 0.026481 0.233135 0.114 0.90956
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1042.5 on 751 degrees of freedom
## Residual deviance: 1003.2 on 746 degrees of freedom
## (2 observations deleted due to missingness)
## AIC: 1015.2
##
## Number of Fisher Scoring iterations: 4
From the result of the binary logistic regression, there are only two significant variables, which are MEASURE and Certified. SCORE is no longer significant.
For every one unit change in Measure, the log odds of using open-book (versus close-book strategy) increase 0.577 (P<0.01).
For every one unit change in certified year, the log odds of using open-book (versus close-book strategy) decrease 0.023 (P=0.002).
mylogit_balanced_SCORE <- glm(OB_indicator ~ Certified + Confidence + SCORE + NameDegree, data = balanced_data, family = "binomial")
# Check the result of our model
summary(mylogit_balanced_SCORE)
##
## Call:
## glm(formula = OB_indicator ~ Certified + Confidence + SCORE +
## NameDegree, family = "binomial", data = balanced_data)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.4781 -1.1531 -0.8928 1.1651 1.5212
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.564489 0.396203 -1.425 0.15423
## Certified -0.021265 0.007440 -2.858 0.00426 **
## Confidence 0.220921 0.090908 2.430 0.01509 *
## SCORE 0.006087 0.007306 0.833 0.40477
## NameDegreeMD 0.102072 0.228403 0.447 0.65495
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1042.5 on 751 degrees of freedom
## Residual deviance: 1027.4 on 747 degrees of freedom
## (2 observations deleted due to missingness)
## AIC: 1037.4
##
## Number of Fisher Scoring iterations: 4
This time, Score still is not a significant predictor for using the open-book strategy. The condience level and certified year are strong predictors to the open-book decision.
For every one unit change in Confidence, the log odds of using open-book (versus close-book strategy) increase 0.221 (P=0.015).
For every one unit change in Confidence, the log odds of using open-book (versus close-book strategy) decrease 0.021 (P<0.01).
# Get a more clean data without any missing value
balanced_data_noNA <- na.omit(balanced_data)
# MANOVA test with
res.man <- manova(cbind(SCORE,MEASURE,Certified,Age,Confidence) ~ OB_indicator, data = balanced_data_noNA)
summary(res.man) # Base on the omnibus test, we can tell that there is a difference betwen open- and close book groups in these selected variables.
## Df Pillai approx F num Df den Df Pr(>F)
## OB_indicator 1 0.050532 7.9406 5 746 2.654e-07 ***
## Residuals 750
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Look to see which differ
summary.aov(res.man)
## Response SCORE :
## Df Sum Sq Mean Sq F value Pr(>F)
## OB_indicator 1 69 68.848 0.6492 0.4207
## Residuals 750 79539 106.052
##
## Response MEASURE :
## Df Sum Sq Mean Sq F value Pr(>F)
## OB_indicator 1 15.45 15.4458 25.398 5.851e-07 ***
## Residuals 750 456.12 0.6082
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Response Certified :
## Df Sum Sq Mean Sq F value Pr(>F)
## OB_indicator 1 781 781.14 7.5334 0.006201 **
## Residuals 750 77768 103.69
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Response Age :
## Df Sum Sq Mean Sq F value Pr(>F)
## OB_indicator 1 538 538.35 5.2353 0.02241 *
## Residuals 750 77122 102.83
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Response Confidence :
## Df Sum Sq Mean Sq F value Pr(>F)
## OB_indicator 1 4.46 4.4592 6.6175 0.01029 *
## Residuals 750 505.39 0.6738
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
From the MANOVA test, we can see that all the input variabls make a difference between open and close book groups. We then do a unparied t-test to see the mean difference between each pair.
t.test(close_book$MEASURE, open_book_pool$MEASURE, alternative = "two.sided", var.equal = FALSE)
##
## Welch Two Sample t-test
##
## data: close_book$MEASURE and open_book_pool$MEASURE
## t = -4.9697, df = 710.07, p-value = 8.411e-07
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.3938713 -0.1707971
## sample estimates:
## mean of x mean of y
## 1.068886 1.351220
t.test(close_book$SCORE, open_book_pool$SCORE, alternative = "two.sided", var.equal = FALSE)
##
## Welch Two Sample t-test
##
## data: close_book$SCORE and open_book_pool$SCORE
## t = -0.75331, df = 751.22, p-value = 0.4515
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -2.0373499 0.9073765
## sample estimates:
## mean of x mean of y
## 29.46684 30.03183
t.test(close_book$Certified, open_book_pool$Certified, alternative = "two.sided", var.equal = FALSE)
##
## Welch Two Sample t-test
##
## data: close_book$Certified and open_book_pool$Certified
## t = 2.7455, df = 742.41, p-value = 0.006188
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## 0.580836 3.495953
## sample estimates:
## mean of x mean of y
## 19.80106 17.76267
t.test(close_book$Age, open_book_pool$Age, alternative = "two.sided", var.equal = FALSE)
##
## Welch Two Sample t-test
##
## data: close_book$Age and open_book_pool$Age
## t = 2.3424, df = 736.13, p-value = 0.01942
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## 0.2799885 3.1788975
## sample estimates:
## mean of x mean of y
## 49.71088 47.98143
t.test(close_book$Confidence, open_book_pool$Confidence, alternative = "two.sided", var.equal = FALSE)
##
## Welch Two Sample t-test
##
## data: close_book$Confidence and open_book_pool$Confidence
## t = -2.5486, df = 748.27, p-value = 0.01101
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.26981342 -0.03501417
## sample estimates:
## mean of x mean of y
## 3.037082 3.189496