imp <- read.csv(file="complete_dataset_all_data_and_LPA.csv", header=T, stringsAsFactors = FALSE, na.strings=c(""," ","NA"))
# ##FTP items
# ftp1 <- d[ , grepl( "Q6.1" , names( d ) ) ]
# ftp2 <- d[ , grepl( "Q9.1" , names( d ) ) ]
# ftp <- cbind(ftp1,ftp2)
# rm(ftp1, ftp2)
# nfja <- (ftp$Q6.1_6 + ftp$Q6.1_11 + ftp$Q9.1_3 + ftp$Q9.1_5 + ftp$Q6.1_7 + ftp$Q9.1_4)/6
# pie <- (ftp$Q9.1_10 + ftp$Q9.1_11 + ftp$Q9.1_12 + ftp$Q9.1_13)/4
# s <- (ftp$Q6.1_2 + ftp$Q6.1_3 + ftp$Q6.1_4)/3
# cc <- (ftp$Q9.1_7 + ftp$Q9.1_8 + ftp$Q9.1_9)/3
# rfps <- (ftp$Q9.1_1 + ftp$Q9.1_2)/2
# mf <- (ftp$Q9.1_6 + ftp$Q9.1_10)/2
#
# ftp_m <- cbind(nfja,pie,s,cc,rfps,mf)
# ftp_m <- as.data.frame(ftp_m)
# ftp_m$tot <- rowMeans(ftp_m, na.rm=T)
# ftp_m <- cbind(ftp_m,summ)
# ftp_m$summ <- as.factor(ftp_m$summ)
# tests <- head(colnames(ftp_m), n=-1)
# rm(nfja,pie,s,cc,rfps,mf)
# ftp_m <- na.omit(ftp_m)
#
# ##identity items
# sid <- d[ , grepl( "Q3.1" , names( d ) ) ]
# eid <- d[ , grepl( "Q4.1" , names( d ) ) ]
# rid <- d[ , grepl( "Q5.1" , names( d ) ) ]
#
# s_rec <- (sid$Q3.1_2 + sid$Q3.1_3 + sid$Q3.1_4 + sid$Q3.1_5 + sid$Q3.1_6 + sid$Q3.1_7)/6
# s_pc <- (sid$Q3.1_11 + sid$Q3.1_12 + sid$Q3.1_13 + sid$Q3.1_14 + sid$Q3.1_15)/5
# s_int <- (sid$Q3.1_8 + sid$Q3.1_9 + sid$Q3.1_10)/3
# e_rec <- (eid$Q4.1_2 + eid$Q4.1_3 + eid$Q4.1_4 + eid$Q4.1_6 + eid$Q4.1_7)/5
# e_pc <- (eid$Q4.1_11 + eid$Q4.1_12 + eid$Q4.1_13 + eid$Q4.1_14 + eid$Q4.1_10)/5
# e_int <- (eid$Q4.1_8 + eid$Q4.1_9 + eid$Q4.1_5)/3
# r_rec <- (rid$Q5.1_2 + rid$Q5.1_3 + rid$Q5.1_4 + rid$Q5.1_5 + rid$Q5.1_6 + rid$Q5.1_7)/6
# r_pc <- (rid$Q5.1_16 + rid$Q5.1_12 + rid$Q5.1_13 + rid$Q5.1_14 + rid$Q5.1_15)/5
# r_int <- (rid$Q5.1_8 + rid$Q5.1_9 + rid$Q5.1_10 + rid$Q5.1_11)/4
#
# id_m <- cbind(s_rec,s_pc,s_int,e_rec,e_pc,e_int,r_rec,r_pc,r_int)
# id_m <- as.data.frame(id_m)
# id_m$s_tot <- (id_m$s_rec + id_m$s_pc + id_m$s_int)/3
# id_m$e_tot <- (id_m$e_rec + id_m$e_pc + id_m$e_int)/3
# id_m$r_tot <- (id_m$r_rec + id_m$r_pc + id_m$r_int)/3
# id_m$rec_tot <- (id_m$s_rec + id_m$e_rec + id_m$r_rec)/3
# id_m$pc_tot <- (id_m$s_pc + id_m$e_pc + id_m$r_pc)/3
# id_m$int_tot <- (id_m$s_int + id_m$e_int + id_m$r_int)/3
# id_m$tot <- (id_m$s_tot + id_m$e_tot + id_m$r_tot)/3
#
# id_m <- id_m[,c(1,2,3,10,4,5,6,11,7,8,9,12,13,14,15,16)]
#
# id_m <- cbind(id_m,summ)
# id_m$summ <- as.factor(id_m$summ)
# tests <- head(colnames(id_m), n=-1)
# rm(list=ls()[!(ls() %in% c('inscode','d','summ','id_m','tests'))])
# id_m <- na.omit(id_m)
#
# ##IBM items
# ibm_sal <- d[ , grepl( "Q11" , names( d ) ) ]
names(imp)
## [1] "R_IDs" "X" "Progress"
## [4] "Finished" "Q1.1" "Q6.1_1"
## [7] "Q6.1_2" "Q6.1_3" "Q6.1_4"
## [10] "Q6.1_5" "Q6.1_6" "Q6.1_7"
## [13] "Q6.1_8" "Q6.1_9" "Q6.1_10"
## [16] "Q6.1_11" "Q9.1_1" "Q9.1_2"
## [19] "Q9.1_3" "Q9.1_4" "Q9.1_5"
## [22] "Q9.1_6" "Q9.1_7" "Q9.1_8"
## [25] "Q9.1_9" "Q9.1_10" "Q9.1_11"
## [28] "Q9.1_12" "Q9.1_13" "Q9.1_14"
## [31] "Q3.1_1" "Q3.1_2" "Q3.1_3"
## [34] "Q3.1_4" "Q3.1_5" "Q3.1_6"
## [37] "Q3.1_7" "Q3.1_8" "Q3.1_9"
## [40] "Q3.1_10" "Q3.1_11" "Q3.1_12"
## [43] "Q3.1_13" "Q3.1_14" "Q3.1_15"
## [46] "Q4.1_1" "Q4.1_2" "Q4.1_3"
## [49] "Q4.1_4" "Q4.1_5" "Q4.1_6"
## [52] "Q4.1_7" "Q4.1_8" "Q4.1_9"
## [55] "Q4.1_10" "Q4.1_11" "Q4.1_12"
## [58] "Q4.1_13" "Q4.1_14" "Q5.1_1"
## [61] "Q5.1_2" "Q5.1_3" "Q5.1_4"
## [64] "Q5.1_5" "Q5.1_6" "Q5.1_7"
## [67] "Q5.1_8" "Q5.1_9" "Q5.1_10"
## [70] "Q5.1_11" "Q5.1_12" "Q5.1_13"
## [73] "Q5.1_14" "Q5.1_15" "Q5.1_16"
## [76] "Q11.1_1" "Q11.1_2" "Q11.1_3"
## [79] "Q11.2_1" "Q11.2_2" "Q11.2_3"
## [82] "Q11.3_1" "Q11.3_2" "Q11.3_3"
## [85] "Q11.4_1" "Q11.4_2" "Q11.4_3"
## [88] "Q11.5_1" "Q11.5_2" "Q11.5_3"
## [91] "Q11.6_1" "Q11.6_2" "Q11.6_3"
## [94] "Q11.7_1" "Q11.7_2" "Q11.7_3"
## [97] "Q11.8_1" "Q11.8_2" "Q11.8_3"
## [100] "Q11.9_1" "Q11.9_2" "Q11.9_3"
## [103] "Q12.1_1" "Q12.1_2" "Q12.1_3"
## [106] "Q12.1_4" "Q12.1_5" "Q12.1_6"
## [109] "Q12.1_7" "Q12.1_8" "Q12.1_9"
## [112] "Q13.1_1" "Q13.1_2" "Q14.1_3"
## [115] "Q14.1_2" "Q14.1_1" "Q14.1_4"
## [118] "Q14.2" "Q14.3_1" "Q14.4_4"
## [121] "Q14.4_5" "Q14.4_6" "Q14.5_4"
## [124] "Q14.5_5" "Q14.5_3" "Q14.6_4"
## [127] "Q14.6_6" "Q14.6_11" "Q14.6_12"
## [130] "Q15.1_4" "Q15.2" "Q16.2"
## [133] "Q16.2_6_TEXT" "Q70" "Q70_2_TEXT"
## [136] "Q16.4_4" "Q16.4_7" "Q16.4_8"
## [139] "Q16.4_9" "Q16.4_15" "Q16.4_16"
## [142] "Q16.4_17" "Q16.4_12" "Q16.5"
## [145] "Q17.1" "Q17.2" "Q17.3_4"
## [148] "Q17.3_5" "Q17.3_8" "Q18.1"
## [151] "Q18.2" "Q71" "Q71_3_TEXT"
## [154] "Q72_1" "Q72_2" "Q75"
## [157] "Q19.1" "Major" "Q19.2"
## [160] "Q19.3" "Q19.4_1" "Start.Year"
## [163] "Q19.4_2" "Start.Year.Month" "Q19.5"
## [166] "Q19.6_1" "Q19.6_2" "Q19.7"
## [169] "Q19.7_2_TEXT" "Q19.8" "Q19.9"
## [172] "Q19.10" "Q19.11" "Q19.12"
## [175] "Country" "Continent" "Q19.13"
## [178] "Q19.13_8_TEXT" "Q19.14" "Q19.15_1"
## [181] "Q19.15_2" "Q19.15_3" "Q19.16"
## [184] "Q19.16_7_TEXT" "Q19.17" "Q19.17_5_TEXT"
## [187] "Q19.18" "Q19.18_7_TEXT" "Q19.19"
## [190] "Q19.20" "Q19.21" "Required"
## [193] "NF" "PI" "Sp"
## [196] "CC" "RF" "MF"
## [199] "ResID_REC" "ResID_INT" "ResID_PC"
## [202] "SciID_REC" "SciID_INT" "SciID_PC"
## [205] "EngrID_REC" "EngrID_PC" "IBM_SciID"
## [208] "IBM_EngrID" "IBM_ResID" "Diff_Res"
## [211] "Diff_Diss" "Diff_Stu" "Advisor"
## [214] "Peer" "s_rec" "s_pc"
## [217] "s_int" "e_rec" "e_pc"
## [220] "e_int" "r_rec" "r_pc"
## [223] "r_int" "IDclass" "IDuncer"
## [226] "IDpp" "nfja" "pie"
## [229] "cc" "mf" "FTPclass"
## [232] "FTPuncer" "FTPpp" "scim"
## [235] "engm" "resm" "IBMclass"
## [238] "IBMuncer" "IBMpp" "int_construct"
## [241] "int_profile" "finalphase"
d <- subset(imp, select=c(193:214))
eid <- imp[ , grepl( "Q4.1" , names( imp ) ) ]
e_int <- (eid$Q4.1_8 + eid$Q4.1_9 + eid$Q4.1_5)/3
d <- cbind(d, e_int)
colnames(d) <- c("ftp1_nf","ftp2_pi","ftp3_sp","ftp4_cc","ftp5_rf","ftp6_mf",
"id1_res-rec","id2_res-int","id3_res-pc",
"id4_sci-rec","id5_sci-int","id6_sci-pc",
"id7_eng_rec","id9_eng_pc",
"ibm1_sci","ibm2_eng","ibm3_res",
"etc1_diff_res","etc2_diff_diss","etc3_diff_stu","etc4_adv","etc5_peer",
"id8_eng_int")
d2 <- subset(imp, select=c(215:223, 227:230, 234:236))
colnames(d2) <- paste("lpa", colnames(d2), sep="-")
d3 <- data.frame(scale(d, center=T, scale=T))
colnames(d3) <- paste("std", colnames(d), sep="-")
ids <- imp$R_IDs
head(d)
## ftp1_nf ftp2_pi ftp3_sp ftp4_cc ftp5_rf ftp6_mf id1_res-rec
## 1 3.666667 4.25 4.000000 3.000000 3.5 4.5 5.0
## 2 2.500000 3.75 4.666667 3.666667 3.0 4.0 3.0
## 3 3.500000 3.75 3.666667 4.000000 2.5 4.0 4.0
## 4 3.000000 3.75 3.666667 3.666667 4.5 4.5 3.2
## 5 3.666667 4.25 2.333333 4.666667 3.0 4.5 4.4
## 6 4.500000 4.00 1.333333 3.000000 4.0 5.0 4.8
## id2_res-int id3_res-pc id4_sci-rec id5_sci-int id6_sci-pc id7_eng_rec
## 1 4.25 5.000000 4.4 5.000000 5.0 5.0
## 2 4.00 3.333333 3.0 4.666667 4.2 3.0
## 3 4.75 4.333333 4.0 4.333333 4.2 3.8
## 4 3.75 4.000000 4.0 5.000000 3.8 5.0
## 5 5.00 4.000000 3.4 5.000000 4.2 4.6
## 6 5.00 5.000000 4.6 5.000000 5.0 5.0
## id9_eng_pc ibm1_sci ibm2_eng ibm3_res etc1_diff_res etc2_diff_diss
## 1 5.0 4.285714 4.000000 4.857143 3.000000 2.0
## 2 4.0 4.428571 3.857143 3.857143 2.333333 3.5
## 3 3.8 4.000000 4.142857 3.714286 5.000000 NA
## 4 5.0 4.714286 5.000000 4.285714 2.333333 1.5
## 5 4.4 4.285714 4.285714 4.571429 3.000000 2.0
## 6 5.0 4.142857 4.857143 4.571429 1.666667 2.5
## etc3_diff_stu etc4_adv etc5_peer id8_eng_int
## 1 1.0 4.875 4.75 5.000000
## 2 1.0 3.500 3.25 4.666667
## 3 5.0 NA 4.50 4.333333
## 4 1.5 4.875 4.25 5.000000
## 5 4.0 4.875 5.00 5.000000
## 6 1.0 4.500 3.25 5.000000
head(d2)
## lpa-s_rec lpa-s_pc lpa-s_int lpa-e_rec lpa-e_pc lpa-e_int
## 1 0.73534227 0.9766860 0.7234524 0.9766642 0.80399264 0.7944945
## 2 NA NA NA NA NA NA
## 3 NA NA NA NA NA NA
## 4 NA NA NA NA NA NA
## 5 -0.09305441 -0.1624052 -0.7698422 0.5792016 0.01549111 0.7944945
## 6 NA NA NA NA NA NA
## lpa-r_rec lpa-r_pc lpa-r_int lpa-nfja lpa-pie lpa-cc lpa-mf
## 1 0.8549564 0.3939927 -0.1537839 0.3233690 0.2934547 -0.4473132 0.2914708
## 2 NA NA NA NA NA NA NA
## 3 NA NA NA NA NA NA NA
## 4 NA NA NA NA NA NA NA
## 5 0.2310767 -0.3092680 0.7896832 0.1774183 0.2934547 1.0894984 0.2914708
## 6 NA NA NA NA NA NA NA
## lpa-scim lpa-engm lpa-resm
## 1 1.1341062 -0.5960461 0.7856047
## 2 NA NA NA
## 3 NA NA NA
## 4 NA NA NA
## 5 0.4642623 0.3569008 0.3078649
## 6 NA NA NA
head(d3)
## std-ftp1_nf std-ftp2_pi std-ftp3_sp std-ftp4_cc std-ftp5_rf std-ftp6_mf
## 1 0.3784695 0.4985965 1.0632640 -0.6274465 0.1389721 0.8563885
## 2 -1.2710684 -0.1734333 1.7634232 0.1309718 -0.3595581 0.2778549
## 3 0.1428212 -0.1734333 0.7131845 0.5101810 -0.8580883 0.2778549
## 4 -0.5641236 -0.1734333 0.7131845 0.1309718 1.1360325 0.8563885
## 5 0.3784695 0.4985965 -0.6871339 1.2685994 -0.3595581 0.8563885
## 6 1.5567109 0.1625816 -1.7373726 -0.6274465 0.6375023 1.4349222
## std-id1_res-rec std-id2_res-int std-id3_res-pc std-id4_sci-rec
## 1 1.10274073 0.1420579 1.11747212 1.02345537
## 2 -0.92232184 -0.1229551 -0.84000455 -0.36668222
## 3 0.09020945 0.6720840 0.33448145 0.62627320
## 4 -0.71981558 -0.3879682 -0.05701388 0.62627320
## 5 0.49522196 0.9370971 -0.05701388 0.03049995
## 6 0.90023447 0.9370971 1.11747212 1.22204645
## std-id5_sci-int std-id6_sci-pc std-id7_eng_rec std-id9_eng_pc
## 1 0.86616824 1.205620124 1.1368468 0.9306750
## 2 0.46479554 0.007740463 -1.1012122 -0.4526968
## 3 0.06342284 0.007740463 -0.2059886 -0.7293711
## 4 0.86616824 -0.591199367 1.1368468 0.9306750
## 5 0.86616824 0.007740463 0.6892350 0.1006519
## 6 0.86616824 1.205620124 1.1368468 0.9306750
## std-ibm1_sci std-ibm2_eng std-ibm3_res std-etc1_diff_res
## 1 0.6667850 0.2918291 0.8892679 0.1038867
## 2 0.8164094 0.1263082 -0.3859723 -0.5894021
## 3 0.3675362 0.4573500 -0.5681494 2.1837530
## 4 1.1156583 1.4504753 0.1605593 -0.5894021
## 5 0.6667850 0.6228709 0.5249136 0.1038867
## 6 0.5171606 1.2849544 0.5249136 -1.2826909
## std-etc2_diff_diss std-etc3_diff_stu std-etc4_adv std-etc5_peer
## 1 -0.43082061 -1.0107159 0.9709375 1.0438450
## 2 0.99354445 -1.0107159 -0.5828872 -0.8407697
## 3 NA 2.6713132 NA 0.7297425
## 4 -0.90560896 -0.5504623 0.9709375 0.4156401
## 5 -0.43082061 1.7508059 0.9709375 1.3579474
## 6 0.04396774 -1.0107159 0.5471671 -0.8407697
## std-id8_eng_int
## 1 0.86616824
## 2 0.46479554
## 3 0.06342284
## 4 0.86616824
## 5 0.86616824
## 6 0.86616824
# run correlations
library(psych)
d.corr <- corr.test(d, adjust = "holm")
d2.corr <- corr.test(d2, adjust = "holm")
d3.corr <- corr.test(d3, adjust = "holm")
# create corrplots
library(corrplot)
## Warning: package 'corrplot' was built under R version 3.6.2
## corrplot 0.84 loaded
rmat <- d.corr$r
pmat <- d.corr$p
corrplot(rmat, type="lower", number.cex = .6, tl.cex =.6, order = "alphabet", tl.srt = 45, tl.col = "black",
p.mat = pmat, insig = "label_sig", pch.col = "white", pch.cex = 1)

rmat <- d2.corr$r
pmat <- d2.corr$p
corrplot(rmat, type="lower", number.cex = .6, tl.cex =.6, order = "alphabet", tl.srt = 45, tl.col = "black",
p.mat = pmat, insig = "label_sig", pch.col = "white", pch.cex = 1)

rmat <- d3.corr$r
pmat <- d3.corr$p
corrplot(rmat, type="lower", number.cex = .6, tl.cex =.6, order = "alphabet", tl.srt = 45, tl.col = "black",
p.mat = pmat, insig = "label_sig", pch.col = "white", pch.cex = 1)

# view matrices
library(DT)
## Warning: package 'DT' was built under R version 3.6.3
mat <- round(d.corr$r, digits = 2)
datatable(mat)
mat <- round(d2.corr$r, digits = 2)
datatable(mat)
mat <- round(d3.corr$r, digits = 2)
datatable(mat)
# look @ institution change item
inschg <- (imp$Q19.8 - 1)
inschg[inschg == 0] <- "No"
inschg[inschg == 1] <- "Yes"
table(inschg, useNA = "always")
## inschg
## No Yes <NA>
## 1556 85 113
table(inschg, imp$FTPclass, useNA = "always")
##
## inschg 1 2 3 4 5 <NA>
## No 159 498 195 134 63 507
## Yes 17 36 7 4 7 14
## <NA> 4 11 3 3 2 90
inschg_cs <- table(inschg, imp$FTPclass, useNA = "always")
csout <- chisq.test(inschg_cs)
## Warning in chisq.test(inschg_cs): Chi-squared approximation may be
## incorrect
csout
##
## Pearson's Chi-squared test
##
## data: inschg_cs
## X-squared = 129.57, df = 10, p-value < 2.2e-16
csout$residuals
##
## inschg 1 2 3 4 5
## No -0.05387019 0.66045800 0.97448332 0.79727400 -0.10914544
## Yes 2.80250426 1.86585402 -0.93100707 -1.08376515 1.87952964
## <NA> -2.23071637 -4.06907493 -2.80863216 -2.01855946 -1.22510436
##
## inschg <NA>
## No -1.50451622
## Yes -2.86861800
## <NA> 8.07089093
# class combinations from FTP LPA
## ----------------------------------------------------
## Combining Gaussian mixture components for clustering
## ----------------------------------------------------
##
## Mclust model name: EVI
## Number of components: 8
##
## Combining steps:
##
## Step | Classes combined at this step | Class labels after this step
## -------|-------------------------------|-----------------------------
## 0 | --- | 1 2 3 4 5 6 7 8
## 1 | 4 & 6 | 1 2 3 4 5 7 8
## 2 | 3 & 4 | 1 2 3 5 7 8
## 3 | 2 & 8 | 1 2 3 5 7
## 4 | 1 & 2 | 1 3 5 7
## 5 | 1 & 7 | 1 3 5
## 6 | 1 & 3 | 1 5
## 7 | 1 & 5 | 1
library(tidyr)
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 3.6.3
##
## Attaching package: 'ggplot2'
## The following objects are masked from 'package:psych':
##
## %+%, alpha
d4 <- cbind(d2[10:13], imp$FTPclass)
colnames(d4)[5] <- "group"
desc <- describeBy(d4, group = "group")
desc1 <- subset(data.frame(desc$`1` ), select=c("mean"), vars != 5)
desc2 <- subset(data.frame(desc$`2` ), select=c("mean"), vars != 5)
desc3 <- subset(data.frame(desc$`3` ), select=c("mean"), vars != 5)
desc4 <- subset(data.frame(desc$`4` ), select=c("mean"), vars != 5)
desc5 <- subset(data.frame(desc$`5` ), select=c("mean"), vars != 5)
descall <- rbind(desc1, desc2, desc3, desc4, desc5)
descall$group <- rbind("g1","g1","g1","g1",
"g2","g2","g2","g2",
"g3","g3","g3","g3",
"g4","g4","g4","g4",
"g5","g5","g5","g5")
descall$var <- rownames(descall)
descall
## mean group var
## lpa-nfja -0.56700311 g1 lpa-nfja
## lpa-pie -1.13042390 g1 lpa-pie
## lpa-cc -0.20604769 g1 lpa-cc
## lpa-mf -0.89208756 g1 lpa-mf
## lpa-nfja1 -0.11366383 g2 lpa-nfja1
## lpa-pie1 0.01595735 g2 lpa-pie1
## lpa-cc1 0.16692814 g2 lpa-cc1
## lpa-mf1 0.08018651 g2 lpa-mf1
## lpa-nfja2 -0.01588458 g3 lpa-nfja2
## lpa-pie2 0.26051101 g3 lpa-pie2
## lpa-cc2 -1.03517633 g3 lpa-cc2
## lpa-mf2 0.33391197 g3 lpa-mf2
## lpa-nfja3 0.85263197 g4 lpa-nfja3
## lpa-pie3 0.80752467 g4 lpa-pie3
## lpa-cc3 0.79225496 g4 lpa-cc3
## lpa-mf3 0.74754569 g4 lpa-mf3
## lpa-nfja4 0.65336918 g5 lpa-nfja4
## lpa-pie4 0.38213628 g5 lpa-pie4
## lpa-cc4 0.64744369 g5 lpa-cc4
## lpa-mf4 -0.79141364 g5 lpa-mf4
#plot
ggplot(data=descall, aes(x = group, y = mean, fill = var)) +
geom_bar(stat="identity", position=position_dodge())
