library(ltm)
library(rio)
library(psych)
library(cocron)

# Question 1
bfi <- import("bfi.sav")

bfi$agree <- rowSums(bfi[,1:5])
bfi$conscien <- rowSums(bfi[, 6:10])

# comparing girls and boys
t.test(bfi$agree~bfi$gender, var.equal=T) # sub-question a
## 
##  Two Sample t-test
## 
## data:  bfi$agree by bfi$gender
## t = -5.5224, df = 2234, p-value = 3.733e-08
## alternative hypothesis: true difference in means between group 1 and group 2 is not equal to 0
## 95 percent confidence interval:
##  -0.3217758 -0.1531330
## sample estimates:
## mean in group 1 mean in group 2 
##        3.438095        3.675550
# One-way analysis of variance  # sub-question b
fit <- aov(bfi$conscien~as.factor(bfi$education))
summary(fit)
##                            Df Sum Sq Mean Sq F value  Pr(>F)   
## as.factor(bfi$education)    4   11.7   2.913   3.518 0.00718 **
## Residuals                2231 1847.2   0.828                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Question 2
t.test(bfi$agree, bfi$conscien, paired = TRUE, var.equal = TRUE)
## 
##  Paired t-test
## 
## data:  bfi$agree and bfi$conscien
## t = 17.233, df = 2235, p-value < 2.2e-16
## alternative hypothesis: true mean difference is not equal to 0
## 95 percent confidence interval:
##  0.4121894 0.5180431
## sample estimates:
## mean difference 
##       0.4651163
# Question 3
fit.1PL<-rasch(bfi[,1:5]) # one parameter logistic model
summary(fit.1PL)
## 
## Call:
## rasch(data = bfi[, 1:5])
## 
## Model Summary:
##    log.Lik      AIC      BIC
##  -4945.139 9902.277 9936.552
## 
## Coefficients:
##             value std.err   z.vals
## Dffclt.A1  1.7866  0.1097  16.2862
## Dffclt.A2 -2.8746  0.1647 -17.4514
## Dffclt.A3 -2.3059  0.1347 -17.1177
## Dffclt.A4 -2.1414  0.1266 -16.9166
## Dffclt.A5 -2.1835  0.1286 -16.9737
## Dscrmn     0.8077  0.0476  16.9754
## 
## Integration:
## method: Gauss-Hermite
## quadrature points: 21 
## 
## Optimization:
## Convergence: 0 
## max(|grad|): 0.019 
## quasi-Newton: BFGS
fit.2PL <-ltm(bfi[,1:5]~z1)  # two parameter logiatic model
summary(fit.2PL)
## 
## Call:
## ltm(formula = bfi[, 1:5] ~ z1)
## 
## Model Summary:
##    log.Lik      AIC      BIC
##  -4651.286 9322.572 9379.697
## 
## Coefficients:
##             value std.err   z.vals
## Dffclt.A1  1.8614  0.2583   7.2072
## Dffclt.A2  1.6891  0.2538   6.6559
## Dffclt.A3  1.2419  0.2206   5.6298
## Dffclt.A4  1.6767  0.2169   7.7303
## Dffclt.A5  1.3036  0.1892   6.8894
## Dscrmn.A1  0.7670  0.0855   8.9656
## Dscrmn.A2 -1.8371  0.1778 -10.3297
## Dscrmn.A3 -2.3360  0.2493  -9.3690
## Dscrmn.A4 -1.1267  0.1064 -10.5931
## Dscrmn.A5 -1.8170  0.1680 -10.8128
## 
## Integration:
## method: Gauss-Hermite
## quadrature points: 21 
## 
## Optimization:
## Convergence: 0 
## max(|grad|): 0.00036 
## quasi-Newton: BFGS
fit.3PL <- tpm(bfi[,1:5])
summary(fit.3PL)
## 
## Call:
## tpm(data = bfi[, 1:5])
## 
## Model Summary:
##    log.Lik      AIC      BIC
##  -4650.379 9330.759 9416.446
## 
## Coefficients:
##             value std.err   z.vals
## Gussng.A1  0.1042  0.0469   2.2219
## Gussng.A2  0.1882  0.2563   0.7344
## Gussng.A3  0.0000  0.0018   0.0131
## Gussng.A4  0.0003  0.0226   0.0113
## Gussng.A5  0.0000  0.0006   0.0010
## Dffclt.A1  1.9048  1.3076   1.4567
## Dffclt.A2  1.4259  0.3543   4.0243
## Dffclt.A3  1.2443  0.2165   5.7485
## Dffclt.A4  1.6741  0.2167   7.7254
## Dffclt.A5  1.3029  0.1875   6.9480
## Dscrmn.A1  1.2890  0.4636   2.7804
## Dscrmn.A2 -2.1018  0.5193  -4.0475
## Dscrmn.A3 -2.3234  0.2430  -9.5601
## Dscrmn.A4 -1.1291  0.1072 -10.5276
## Dscrmn.A5 -1.8194  0.1667 -10.9152
## 
## Integration:
## method: Gauss-Hermite
## quadrature points: 21 
## 
## Optimization:
## Optimizer: optim (BFGS)
## Convergence: 0 
## max(|grad|): 0.024
# comparing models
anova(fit.1PL, fit.2PL)
## 
##  Likelihood Ratio Table
##             AIC     BIC  log.Lik    LRT df p.value
## fit.1PL 9902.28 9936.55 -4945.14                  
## fit.2PL 9322.57 9379.70 -4651.29 587.71  4  <0.001
anova(fit.2PL, fit.3PL)
## 
##  Likelihood Ratio Table
##             AIC     BIC  log.Lik  LRT df p.value
## fit.2PL 9322.57 9379.70 -4651.29                
## fit.3PL 9330.76 9416.45 -4650.38 1.81  5   0.874
# two-parameter model has the best fit with the data.
# hardest item is A1, and the easiest item is A3
# item with highest discrimination is A1 and item with lowest discrimination is A3.

# Question 4
# alpha for agreeableness subscale
psych::alpha(bfi[,1:5], check.keys=TRUE)$total$raw_alpha
## [1] 0.5758353
# alpha for conscientionous subscale
psych::alpha(bfi[,6:10], check.keys=TRUE)$total$raw_alpha
## [1] 0.6196014
# comparing two alpha
cocron::cocron.two.coefficients(alpha=c(.5758, .6196), n=nrow(bfi),dep=TRUE, r=.0725)
## 
##   Compare two alpha coefficients
## 
## Comparison between: a1 = 0.5758, a2 = 0.6196
## The coefficients are based on dependent groups
## Group sizes: n1 = 2236, n2 = 2236
## Null hypothesis: a1 is equal to a2
## Alternative hypothesis: a1 is not equal to a2 (two-sided)
## Level of significance: 0.05
## 
## t = 2.5836, df = 2234, p-value = 0.0098
## Null hypothesis rejected