#Load packages and dataset
knitr::opts_chunk$set(
echo = TRUE,
message = FALSE,
warning = FALSE,
fig.path = "plots/",
dev = "png",
dpi = 300,
fig.width = 8,
fig.height = 6
)
# Load libraries quietly
suppressPackageStartupMessages({
# load packages
library(readxl) #import data
library(here) #import data
library(summarytools)
library(psych)
library(ggplot2)
library("writexl")
library("apaTables")
library(tidyLPA)
library(dplyr)
library(tidyverse)
library(mclust)
library(ltm)
library(tidyr)
library(MASS)
library(lavaan)
library(lavaanPlot)
library(semTools)
library(effectsize)
library(MixAll)
library(tidyLPA)
library(nnet)
library(stats)
library(tidyverse)
library("DescTools")
library(GGally)
library(flextable)
})
#Only select University Students by Age less than 25 and Students who Identify as Male or Female. #Clean up data and select before any analyses
filtered_df <- data %>%
filter(SEX %in% c("Female", "Male"),
!COLLEGENAME %in% c("Merchandising, Hospitality, and Tourism", "Information"),
!RACE %in% c("American Indian/Alaskan Native","Native Hawaiian Pacific Islander"))
filtered_df$COLLEGENAME <- droplevels(filtered_df$COLLEGENAME)
filtered_df$RACE <- droplevels(filtered_df$RACE)
table(filtered_df$AGE)
##
## 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37
## 5 185 273 228 213 123 90 58 50 35 39 23 30 21 26 16 11 10 13 7
## 38 39 40 41 42 43 44 45 46 47 48 49
## 11 6 3 5 6 4 7 3 5 8 4 42
mean(filtered_df$AGE)
## [1] 24.38718
sd(filtered_df$AGE)
## [1] 6.824481
table(filtered_df$SEX)
##
## Female Male
## 1028 532
prop.table(table(filtered_df$SEX))
##
## Female Male
## 0.6589744 0.3410256
data$SEX <-as.factor(data$SEX)
table(filtered_df$RACE)
##
## Asian Black/African American Hispanic/Latinx
## 142 103 282
## Two or More Races White/Caucasian
## 138 895
data$RACE <-as.factor(data$RACE)
prop.table(table(filtered_df$RACE))
##
## Asian Black/African American Hispanic/Latinx
## 0.09102564 0.06602564 0.18076923
## Two or More Races White/Caucasian
## 0.08846154 0.57371795
table(filtered_df$COLLEGENAME)
##
## Business Education
## 161 228
## Engineering Health and Public Service
## 152 135
## Journalism Liberal Arts and Social Sciences
## 42 470
## Music Science
## 73 210
## Visual Arts and Design
## 89
data$COLLEGENAME <-as.factor(data$COLLEGENAME)
prop.table(table(filtered_df$COLLEGENAME))
##
## Business Education
## 0.10320513 0.14615385
## Engineering Health and Public Service
## 0.09743590 0.08653846
## Journalism Liberal Arts and Social Sciences
## 0.02692308 0.30128205
## Music Science
## 0.04679487 0.13461538
## Visual Arts and Design
## 0.05705128
#install.packages(c("flextable", "officer", "dplyr"))
# Age summary
library(dplyr)
library(flextable)
library(officer)
# AGE summary
age_summary <- data.frame(
Category = "Age",
Variable = "Mean (SD)",
Count = round(mean(filtered_df$AGE, na.rm = TRUE), 2),
Proportion = round(sd(filtered_df$AGE, na.rm = TRUE), 2))
# SEX summary
sex_counts <- as.data.frame(table(filtered_df$SEX))
sex_props <- as.data.frame(prop.table(table(filtered_df$SEX)))
sex_summary <- sex_counts %>%
rename(Variable = Var1, Count = Freq) %>%
mutate(Proportion = round(sex_props$Freq, 3),
Category = "Sex") %>%
dplyr::select(Category, Variable, Count, Proportion)
# RACE summary
race_counts <- as.data.frame(table(filtered_df$RACE))
race_props <- as.data.frame(prop.table(table(filtered_df$RACE)))
race_summary <- race_counts %>%
rename(Variable = Var1, Count = Freq) %>%
mutate(Proportion = round(race_props$Freq, 3),
Category = "Race") %>%
dplyr::select(Category, Variable, Count, Proportion)
# Honors College summary
honors_counts <- as.data.frame(table(filtered_df$UNTHONOR))
honors_props <- as.data.frame(prop.table(table(filtered_df$UNTHONOR)))
honors_summary <- honors_counts %>%
rename(Variable = Var1, Count = Freq) %>%
mutate(Proportion = round(honors_props$Freq, 3),
Category = "Honors College") %>%
dplyr::select(Category, Variable, Count, Proportion)
# COLLEGENAME summary
college_counts <- as.data.frame(table(filtered_df$COLLEGENAME))
college_props <- as.data.frame(prop.table(table(filtered_df$COLLEGENAME)))
college_summary <- college_counts %>%
rename(Variable = Var1, Count = Freq) %>%
mutate(Proportion = round(college_props$Freq, 3),
Category = "College") %>%
dplyr::select(Category, Variable, Count, Proportion)
# Combine all
demo_summary <- bind_rows(sex_summary, race_summary, honors_summary, college_summary)
# Format proportion as percentages
demographics <- demo_summary %>%
mutate(Proportion = paste0(round(Proportion * 100, 1), "%"))
# Create flextables
age_ft <- flextable(age_summary)
demo_ft <- flextable(demographics)
# Create and export Word document
doc <- read_docx() %>%
body_add_par("Demographic Summary", style = "heading 1") %>%
body_add_flextable(demo_ft)
print(doc, target = "Demographic_Summary.docx")
#Personality Scale This section examines the reliability and outliers for each of the Big Five traits
#define columns to reverse code
reverse_cols = c("P1", "P2","P3","P4","P5","P16","P17","P18","P19","P20",
"P26","P27","P28","P29","P30","P36","P37","P38","P39","P40",
"P46","P47","P48","P49","P50")
#reverse code Q2 and Q5 columns
filtered_df[ , reverse_cols] = 6 - filtered_df[ , reverse_cols]
filtered_df <- as_tibble(filtered_df)
IPIP_Ext <- filtered_df %>% dplyr::select(P11, P12, P13, P14, P15, P16, P17, P18, P19, P20)
IPIP_Open <- filtered_df %>% dplyr::select(P21, P22, P23, P24, P25, P26, P27, P28, P29, P30)
IPIP_Neu <- filtered_df %>% dplyr::select(P1, P2, P3, P4, P5, P6, P7, P8, P9, P10)
IPIP_Agree <- filtered_df %>% dplyr::select(P31, P32, P33, P34, P35, P36, P37, P38, P39, P40)
IPIP_Cons <- filtered_df %>% dplyr::select(P41, P42, P43, P44, P45, P46, P47, P48, P49, P50)
cronbach.alpha(IPIP_Ext, CI=TRUE, standardized=TRUE)
##
## Standardized Cronbach's alpha for the 'IPIP_Ext' data-set
##
## Items: 10
## Sample units: 1560
## alpha: 0.895
##
## Bootstrap 95% CI based on 1000 samples
## 2.5% 97.5%
## 0.887 0.903
cronbach.alpha(IPIP_Open, CI=TRUE, standardized=TRUE)
##
## Standardized Cronbach's alpha for the 'IPIP_Open' data-set
##
## Items: 10
## Sample units: 1560
## alpha: 0.795
##
## Bootstrap 95% CI based on 1000 samples
## 2.5% 97.5%
## 0.776 0.811
cronbach.alpha(IPIP_Neu, CI=TRUE, standardized=TRUE)
##
## Standardized Cronbach's alpha for the 'IPIP_Neu' data-set
##
## Items: 10
## Sample units: 1560
## alpha: 0.872
##
## Bootstrap 95% CI based on 1000 samples
## 2.5% 97.5%
## 0.861 0.880
cronbach.alpha(IPIP_Agree, CI=TRUE, standardized=TRUE)
##
## Standardized Cronbach's alpha for the 'IPIP_Agree' data-set
##
## Items: 10
## Sample units: 1560
## alpha: 0.802
##
## Bootstrap 95% CI based on 1000 samples
## 2.5% 97.5%
## 0.787 0.818
cronbach.alpha(IPIP_Cons, CI=TRUE, standardized=TRUE)
##
## Standardized Cronbach's alpha for the 'IPIP_Cons' data-set
##
## Items: 10
## Sample units: 1560
## alpha: 0.864
##
## Bootstrap 95% CI based on 1000 samples
## 2.5% 97.5%
## 0.853 0.875
#Create composites for IPIP
filtered_df$Extra <- rowMeans(filtered_df[,c("P11", "P12", "P13","P14", "P15", "P16", "P17", "P18", "P19", "P20")], na.rm=TRUE)
filtered_df$Open <- rowMeans(filtered_df[,c("P21", "P22", "P23", "P24", "P25", "P26", "P27", "P28", "P29", "P30")], na.rm=TRUE)
filtered_df$EmotStab <- rowMeans(filtered_df[,c("P1", "P2", "P3", "P4", "P5", "P6", "P7", "P8", "P9", "P10")], na.rm=TRUE)
filtered_df$Agree <- rowMeans(filtered_df[,c("P31","P32", "P33", "P34", "P35", "P36", "P37", "P38", "P39", "P40")], na.rm=TRUE)
filtered_df$Conscientious <- rowMeans(filtered_df[,c("P41", "P42", "P43", "P44","P45", "P46", "P47", "P48", "P49", "P50")], na.rm=TRUE)
#Imagination Scale This section examines the reliability and outliers for each of the factors of the Imagination scale from Chang et al. (Initiating Ideas, Conceiving Ideas, Transforming Ideas)
## lavaan 0.6-19 ended normally after 45 iterations
##
## Estimator ML
## Optimization method NLMINB
## Number of model parameters 57
##
## Number of observations 1560
##
## Model Test User Model:
##
## Test statistic 1487.062
## Degrees of freedom 243
## P-value (Chi-square) 0.000
##
## Model Test Baseline Model:
##
## Test statistic 18640.162
## Degrees of freedom 276
## P-value 0.000
##
## User Model versus Baseline Model:
##
## Comparative Fit Index (CFI) 0.932
## Tucker-Lewis Index (TLI) 0.923
##
## Loglikelihood and Information Criteria:
##
## Loglikelihood user model (H0) -47648.225
## Loglikelihood unrestricted model (H1) -46904.694
##
## Akaike (AIC) 95410.450
## Bayesian (BIC) 95715.539
## Sample-size adjusted Bayesian (SABIC) 95534.463
##
## Root Mean Square Error of Approximation:
##
## RMSEA 0.057
## 90 Percent confidence interval - lower 0.055
## 90 Percent confidence interval - upper 0.060
## P-value H_0: RMSEA <= 0.050 0.000
## P-value H_0: RMSEA >= 0.080 0.000
##
## Standardized Root Mean Square Residual:
##
## SRMR 0.041
##
## Parameter Estimates:
##
## Standard errors Standard
## Information Expected
## Information saturated (h1) model Structured
##
## Latent Variables:
## Estimate Std.Err z-value P(>|z|)
## initiating =~
## ICS1 1.000
## ICS2 0.821 0.033 24.885 0.000
## ICS3 1.158 0.043 27.226 0.000
## ICS4 1.228 0.041 29.630 0.000
## ICS5 1.081 0.039 27.405 0.000
## ICS6 1.143 0.041 27.573 0.000
## ICS7 1.058 0.045 23.586 0.000
## ICS9 0.896 0.041 21.990 0.000
## conceiving =~
## ICS11 1.000
## ICS12 1.007 0.043 23.399 0.000
## ICS14 0.948 0.055 17.139 0.000
## ICS15 0.926 0.054 17.235 0.000
## ICS16 0.843 0.043 19.464 0.000
## ICS18 0.884 0.048 18.505 0.000
## ICS19 1.067 0.049 21.663 0.000
## ICS20 1.025 0.051 20.184 0.000
## ICS21 1.125 0.049 22.777 0.000
## transforming =~
## ICS23 1.000
## ICS24 1.027 0.038 27.075 0.000
## ICS25 1.087 0.048 22.852 0.000
## ICS26 0.983 0.042 23.149 0.000
## ICS27 0.992 0.047 21.044 0.000
## ICS28 1.105 0.046 24.031 0.000
## ICS29 1.051 0.041 25.363 0.000
##
## Covariances:
## Estimate Std.Err z-value P(>|z|)
## initiating ~~
## conceiving 0.399 0.025 15.931 0.000
## transforming 0.411 0.025 16.670 0.000
## conceiving ~~
## transforming 0.424 0.026 16.022 0.000
## .ICS5 ~~
## .ICS6 0.138 0.017 8.105 0.000
## .ICS11 ~~
## .ICS12 0.264 0.024 10.856 0.000
## .ICS14 ~~
## .ICS15 0.448 0.032 14.055 0.000
## .ICS23 ~~
## .ICS24 0.151 0.017 8.796 0.000
## .ICS24 ~~
## .ICS25 0.158 0.018 8.931 0.000
## .ICS28 ~~
## .ICS29 0.126 0.015 8.445 0.000
##
## Variances:
## Estimate Std.Err z-value P(>|z|)
## .ICS1 0.591 0.024 25.132 0.000
## .ICS2 0.458 0.018 25.498 0.000
## .ICS3 0.623 0.026 24.359 0.000
## .ICS4 0.441 0.020 22.231 0.000
## .ICS5 0.516 0.022 23.843 0.000
## .ICS6 0.560 0.024 23.739 0.000
## .ICS7 0.925 0.036 25.934 0.000
## .ICS9 0.838 0.032 26.352 0.000
## .ICS11 0.859 0.033 26.050 0.000
## .ICS12 0.825 0.032 25.951 0.000
## .ICS14 1.141 0.043 26.686 0.000
## .ICS15 1.068 0.040 26.663 0.000
## .ICS16 0.559 0.022 25.952 0.000
## .ICS18 0.754 0.029 26.322 0.000
## .ICS19 0.525 0.021 24.530 0.000
## .ICS20 0.702 0.027 25.599 0.000
## .ICS21 0.418 0.018 23.136 0.000
## .ICS23 0.677 0.027 25.318 0.000
## .ICS24 0.546 0.022 24.551 0.000
## .ICS25 0.689 0.028 24.889 0.000
## .ICS26 0.534 0.021 24.851 0.000
## .ICS27 0.808 0.031 25.870 0.000
## .ICS28 0.545 0.023 23.572 0.000
## .ICS29 0.365 0.016 22.294 0.000
## initiating 0.568 0.037 15.292 0.000
## conceiving 0.457 0.038 12.141 0.000
## transforming 0.488 0.036 13.649 0.000
## lavaan 0.6-19 ended normally after 45 iterations
##
## Estimator ML
## Optimization method NLMINB
## Number of model parameters 57
##
## Number of observations 1560
##
## Model Test User Model:
##
## Test statistic 1487.062
## Degrees of freedom 243
## P-value (Chi-square) 0.000
##
## Model Test Baseline Model:
##
## Test statistic 18640.162
## Degrees of freedom 276
## P-value 0.000
##
## User Model versus Baseline Model:
##
## Comparative Fit Index (CFI) 0.932
## Tucker-Lewis Index (TLI) 0.923
##
## Loglikelihood and Information Criteria:
##
## Loglikelihood user model (H0) -47648.225
## Loglikelihood unrestricted model (H1) -46904.694
##
## Akaike (AIC) 95410.450
## Bayesian (BIC) 95715.539
## Sample-size adjusted Bayesian (SABIC) 95534.463
##
## Root Mean Square Error of Approximation:
##
## RMSEA 0.057
## 90 Percent confidence interval - lower 0.055
## 90 Percent confidence interval - upper 0.060
## P-value H_0: RMSEA <= 0.050 0.000
## P-value H_0: RMSEA >= 0.080 0.000
##
## Standardized Root Mean Square Residual:
##
## SRMR 0.041
##
## Parameter Estimates:
##
## Standard errors Standard
## Information Expected
## Information saturated (h1) model Structured
##
## Latent Variables:
## Estimate Std.Err z-value P(>|z|)
## initiating =~
## ICS1 1.000
## ICS2 0.821 0.033 24.885 0.000
## ICS3 1.158 0.043 27.226 0.000
## ICS4 1.228 0.041 29.630 0.000
## ICS5 1.081 0.039 27.405 0.000
## ICS6 1.143 0.041 27.573 0.000
## ICS7 1.058 0.045 23.586 0.000
## ICS9 0.896 0.041 21.990 0.000
## conceiving =~
## ICS11 1.000
## ICS12 1.007 0.043 23.399 0.000
## ICS14 0.948 0.055 17.139 0.000
## ICS15 0.926 0.054 17.235 0.000
## ICS16 0.843 0.043 19.464 0.000
## ICS18 0.884 0.048 18.505 0.000
## ICS19 1.067 0.049 21.663 0.000
## ICS20 1.025 0.051 20.184 0.000
## ICS21 1.125 0.049 22.777 0.000
## transforming =~
## ICS23 1.000
## ICS24 1.027 0.038 27.075 0.000
## ICS25 1.087 0.048 22.852 0.000
## ICS26 0.983 0.042 23.149 0.000
## ICS27 0.992 0.047 21.044 0.000
## ICS28 1.105 0.046 24.031 0.000
## ICS29 1.051 0.041 25.363 0.000
##
## Covariances:
## Estimate Std.Err z-value P(>|z|)
## initiating ~~
## conceiving 0.399 0.025 15.931 0.000
## transforming 0.411 0.025 16.670 0.000
## conceiving ~~
## transforming 0.424 0.026 16.022 0.000
## .ICS5 ~~
## .ICS6 0.138 0.017 8.105 0.000
## .ICS11 ~~
## .ICS12 0.264 0.024 10.856 0.000
## .ICS14 ~~
## .ICS15 0.448 0.032 14.055 0.000
## .ICS23 ~~
## .ICS24 0.151 0.017 8.796 0.000
## .ICS24 ~~
## .ICS25 0.158 0.018 8.931 0.000
## .ICS28 ~~
## .ICS29 0.126 0.015 8.445 0.000
##
## Variances:
## Estimate Std.Err z-value P(>|z|)
## .ICS1 0.591 0.024 25.132 0.000
## .ICS2 0.458 0.018 25.498 0.000
## .ICS3 0.623 0.026 24.359 0.000
## .ICS4 0.441 0.020 22.231 0.000
## .ICS5 0.516 0.022 23.843 0.000
## .ICS6 0.560 0.024 23.739 0.000
## .ICS7 0.925 0.036 25.934 0.000
## .ICS9 0.838 0.032 26.352 0.000
## .ICS11 0.859 0.033 26.050 0.000
## .ICS12 0.825 0.032 25.951 0.000
## .ICS14 1.141 0.043 26.686 0.000
## .ICS15 1.068 0.040 26.663 0.000
## .ICS16 0.559 0.022 25.952 0.000
## .ICS18 0.754 0.029 26.322 0.000
## .ICS19 0.525 0.021 24.530 0.000
## .ICS20 0.702 0.027 25.599 0.000
## .ICS21 0.418 0.018 23.136 0.000
## .ICS23 0.677 0.027 25.318 0.000
## .ICS24 0.546 0.022 24.551 0.000
## .ICS25 0.689 0.028 24.889 0.000
## .ICS26 0.534 0.021 24.851 0.000
## .ICS27 0.808 0.031 25.870 0.000
## .ICS28 0.545 0.023 23.572 0.000
## .ICS29 0.365 0.016 22.294 0.000
## initiating 0.568 0.037 15.292 0.000
## conceiving 0.457 0.038 12.141 0.000
## transforming 0.488 0.036 13.649 0.000
## chisq df pvalue cfi tli
## 1487.062 243.000 0.000 0.932 0.923
## rmsea rmsea.ci.lower rmsea.ci.upper srmr
## 0.057 0.055 0.060 0.041
##
## Standardized Cronbach's alpha for the 'ICS_Init' data-set
##
## Items: 8
## Sample units: 1560
## alpha: 0.89
##
## Bootstrap 95% CI based on 1000 samples
## 2.5% 97.5%
## 0.879 0.899
##
## Standardized Cronbach's alpha for the 'ICS_Conc' data-set
##
## Items: 9
## Sample units: 1560
## alpha: 0.851
##
## Bootstrap 95% CI based on 1000 samples
## 2.5% 97.5%
## 0.838 0.864
##
## Standardized Cronbach's alpha for the 'ICS_Trans' data-set
##
## Items: 7
## Sample units: 1560
## alpha: 0.87
##
## Bootstrap 95% CI based on 1000 samples
## 2.5% 97.5%
## 0.858 0.882
## < table of extent 0 >
## Multivariate
## FALSE TRUE
## 1549 11
#Latent Profile/Cluster Analysis (LPA) - continuous questions ##Research Question 1 ———- ##What latent profiles of personality and imagination exist among university students?
set.seed(2025)
##Do patterns exist among latent constructs of personality and imagination among a sample of college students?
vLPA <- filtered_df %>%
dplyr::select(Initiate, Conceive, Transform, Extra, Open, EmotStab, Agree, Conscientious) %>%
single_imputation() %>%
scale() %>%
estimate_profiles(1:5)
vLPA
## tidyLPA analysis using mclust:
##
## Model Classes AIC BIC Entropy prob_min prob_max n_min n_max BLRT_p
## 1 1 35440.70 35526.34 1.00 1.00 1.00 1.00 1.00
## 1 2 33497.77 33631.58 0.78 0.92 0.95 0.43 0.57 0.01
## 1 3 32839.51 33021.49 0.80 0.85 0.92 0.11 0.53 0.01
## 1 4 32555.28 32785.43 0.72 0.79 0.91 0.14 0.31 0.01
## 1 5 32169.11 32447.44 0.77 0.74 0.91 0.06 0.35 0.01
plot_profiles(vLPA)