#load libraries
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats   1.0.0     ✔ readr     2.1.4
## ✔ ggplot2   3.4.4     ✔ stringr   1.5.0
## ✔ lubridate 1.9.3     ✔ tibble    3.2.1
## ✔ purrr     1.0.2     ✔ tidyr     1.3.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(readr)
library(forcats)
library(ggplot2)
library(lubridate)
library(purrr)
library(stringr)
library(tibble)
library(tidyr)
library(psych)
## 
## Attaching package: 'psych'
## 
## The following objects are masked from 'package:ggplot2':
## 
##     %+%, alpha
library(mice)
## 
## Attaching package: 'mice'
## 
## The following object is masked from 'package:stats':
## 
##     filter
## 
## The following objects are masked from 'package:base':
## 
##     cbind, rbind
library(sjstats)
## 
## Attaching package: 'sjstats'
## 
## The following object is masked from 'package:psych':
## 
##     phi
library(formattable)
options(scipen = 999)
#import data
# Using a relative path with correct working directory
setwd("/Users/melissalagunas/Desktop/Lab/DISSERTATION")
Dissertation_Main_Study_August_30 <- read_csv("Dissertation_Main_Study_August 30, 2024_08.47.csv")
## New names:
## Rows: 323 Columns: 90
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "," chr
## (90): StartDate, EndDate, Status, IPAddress, Progress, Duration (in seco...
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## • `Q29` -> `Q29...18`
## • `Q1` -> `Q1...22`
## • `Q2` -> `Q2...23`
## • `Q1` -> `Q1...74`
## • `Q2` -> `Q2...75`
## • `Q29` -> `Q29...90`
# rename dataset 
dat <- Dissertation_Main_Study_August_30
#rename consent
dat <- dplyr::rename(dat, consent = Q29...18)
# assess how many individuals opened the survey link vs granted consent
dat <- dplyr::filter(dat, consent == "4")
nrow(dat)
## [1] 319

Number of people who opened the survey = 319 Number of people who consented = 319

# Identify duplicates based on key columns (e.g., RespondentID, Email, Consent)
duplicates <- dat[duplicated(dat[, c("Q1...22", "IPAddress")]), ]

# Print the number of duplicate rows
cat("Number of duplicate rows based on key columns: ", nrow(duplicates), "\n")
## Number of duplicate rows based on key columns:  7
# Remove duplicates based on the key columns, keeping only the first occurrence
dat <- dat[!duplicated(dat[, c("Q1...22", "IPAddress")]), ]

# Check the number of rows before and after removing duplicates
cat("Number of rows before removing duplicates: ", nrow(dat), "\n")
## Number of rows before removing duplicates:  312
cat("Number of rows after removing duplicates: ", nrow(dat), "\n")
## Number of rows after removing duplicates:  312
# Analyze the survey completion time after removing duplicates
summary(dat$`Duration (in seconds)`)
##    Length     Class      Mode 
##       312 character character
# delete variables

dat2 = select(dat,-c(StartDate, EndDate, Status, IPAddress, Finished, RecordedDate, ResponseId, RecipientLastName, RecipientFirstName, RecipientEmail, ExternalReference, LocationLatitude, LocationLongitude, DistributionChannel, UserLanguage))

# change variable names
dat2 <- dat2 %>%
  rename(
    US_located_age = Q35,
    PD = Q1...22,
    OC_1 = Q2...23,
    OC_2 = Q31_1,
    SS_1 = Q30_1, 
    SS_2 = Q30_2,
    SS_3 = Q30_3, 
    SS_4 = Q30_4, 
    SS_5 = Q30_5, 
    SS_6 = Q30_6, 
    SS_7 = Q30_7, 
    SS_8 = Q30_8, 
    SS_9 = Q30_9, 
    SS_10 = Q30_10, 
    SS_11 = Q30_11, 
    SS_12 = Q30_12,
    SS_13_TEXT = Q32,
    PSNQ_1 = Q27_1,
    PSNQ_2 = Q27_2,
    PSNQ_3 = Q27_3,
    PSNQ_4 = Q27_4,
    PSNQ_5 = Q27_5,
    PSNQ_6 = Q27_6,
    PSNQ_7 = Q27_7,
    PSNQ_8 = Q27_8,
    PSNQ_9 = Q27_9,
    PSNQ_10 = Q27_10,
    SBS_1 = Q4_1,
    SBS_2 = Q5_1,
    SBS_3 = Q6_1,
    SBS_4 = Q7_1,
    SBS_5 = Q8_1,
    SBS_6 = Q9_1,
    SBS_7 = Q10_1,
    PF_1 = Q11_1,  
    PF_2 = Q11_2,  
    PF_3 = Q11_3,  
    PF_4 = Q11_4,  
    PF_5 = Q11_5,  
    PF_6 = Q11_6,  
    PF_7 = Q11_7,  
    PF_8 = Q11_8, 
    CS_1 = Q26_1, 
    CS_2 = Q26_2, 
    CS_3 = Q26_3, 
    CS_4 = Q26_4, 
    CS_5 = Q26_5, 
    BRS_1 = Q12_1,
    BRS_2 = Q12_2,
    BRS_3 = Q12_3,
    BRS_4 = Q12_4,
    BRS_5 = Q12_5,
    BRS_6 = Q12_6,
    age = Q1...74,
    gender = Q2...75,
    gender_8_TEXT = Q2_8_TEXT,
    sexual_orientation = Q3,
    sexual_orientation_9_TEXT = Q3_9_TEXT,
    employment_status = Q13,
    employment_status_TEXT = Q13_3_TEXT,
    US_born = Q4,
    race = Q5, 
    race_10_TEXT = Q5_10_TEXT,
    income = Q6,
    fam_income = Q7,
    religion = Q8,
    religion_15_TEXT = Q8_15_TEXT,
    education = Q11,
    education_9_TEXT = Q11_9_TEXT,
    year_education = Q12)
# check structure
str(dat2)
## tibble [312 × 75] (S3: tbl_df/tbl/data.frame)
##  $ Progress                 : chr [1:312] "100" "100" "100" "100" ...
##  $ Duration (in seconds)    : chr [1:312] "803" "1034" "533" "1547" ...
##  $ consent                  : chr [1:312] "4" "4" "4" "4" ...
##  $ US_located_age           : chr [1:312] NA NA NA NA ...
##  $ employment_status        : chr [1:312] "1" "2" "1" "2" ...
##  $ employment_status_TEXT   : chr [1:312] NA NA NA NA ...
##  $ PD                       : chr [1:312] "Associate Attorney at large international law firm." "The Information Technology (IT) profession involves designing, developing, managing, and maintaining computer s"| __truncated__ "Health education" "Education - I work as a college advisor for high school students and support them throughout the college application process." ...
##  $ OC_1                     : chr [1:312] "3" "4" "3" "1" ...
##  $ OC_2                     : chr [1:312] "2" "3" "5" "2" ...
##  $ SS_1                     : chr [1:312] "6" "7" "1" "5" ...
##  $ SS_2                     : chr [1:312] "7" "7" "1" "5" ...
##  $ SS_3                     : chr [1:312] "6" "6" "1" "4" ...
##  $ SS_4                     : chr [1:312] "6" "5" "1" "4" ...
##  $ SS_5                     : chr [1:312] "7" "7" "1" "5" ...
##  $ SS_6                     : chr [1:312] "6" "5" "1" "7" ...
##  $ SS_7                     : chr [1:312] "6" "5" "1" "7" ...
##  $ SS_8                     : chr [1:312] "6" "6" "5" "4" ...
##  $ SS_9                     : chr [1:312] "6" "5" "5" "7" ...
##  $ SS_10                    : chr [1:312] "7" "7" "3" "6" ...
##  $ SS_11                    : chr [1:312] "7" "6" "5" "4" ...
##  $ SS_12                    : chr [1:312] "7" "5" "5" "7" ...
##  $ SS_13_TEXT               : chr [1:312] "Mother and Friend" "Uncle(Family Member)" "Friend" "A friend who also is a mother-figure to me." ...
##  $ PSNQ_1                   : chr [1:312] "6" "6" "6" "4" ...
##  $ PSNQ_2                   : chr [1:312] "5" "6" "6" "6" ...
##  $ PSNQ_3                   : chr [1:312] "5" "6" "7" "7" ...
##  $ PSNQ_4                   : chr [1:312] "5" "6" "7" "6" ...
##  $ PSNQ_5                   : chr [1:312] "4" "7" "7" "7" ...
##  $ PSNQ_6                   : chr [1:312] "3" "7" "7" "7" ...
##  $ PSNQ_7                   : chr [1:312] "4" "7" "6" "7" ...
##  $ PSNQ_8                   : chr [1:312] "5" "7" "6" "7" ...
##  $ PSNQ_9                   : chr [1:312] "3" "7" "6" "5" ...
##  $ PSNQ_10                  : chr [1:312] "3" "7" "6" "6" ...
##  $ SBS_1                    : chr [1:312] "2" "5" "5" "2" ...
##  $ SBS_2                    : chr [1:312] "2" "4" "5" "2" ...
##  $ SBS_3                    : chr [1:312] "3" "4" "5" "5" ...
##  $ SBS_4                    : chr [1:312] "4" "5" "3" "5" ...
##  $ SBS_5                    : chr [1:312] "3" "5" "3" "5" ...
##  $ SBS_6                    : chr [1:312] "3" "4" "5" "3" ...
##  $ SBS_7                    : chr [1:312] "3" "5" "5" "5" ...
##  $ PF_1                     : chr [1:312] "5" "7" "6" "7" ...
##  $ PF_2                     : chr [1:312] "4" "7" "7" "4" ...
##  $ PF_3                     : chr [1:312] "5" "7" "7" "6" ...
##  $ PF_4                     : chr [1:312] "6" "7" "7" "4" ...
##  $ PF_5                     : chr [1:312] "6" "7" "7" "7" ...
##  $ PF_6                     : chr [1:312] "7" "7" "7" "5" ...
##  $ PF_7                     : chr [1:312] "6" "7" "7" "6" ...
##  $ PF_8                     : chr [1:312] "5" "7" "7" "6" ...
##  $ CS_1                     : chr [1:312] "4" "4" "5" "5" ...
##  $ CS_2                     : chr [1:312] "4" "5" "5" "5" ...
##  $ CS_3                     : chr [1:312] "5" "5" "4" "5" ...
##  $ CS_4                     : chr [1:312] "4" "5" "5" "5" ...
##  $ CS_5                     : chr [1:312] "5" "5" "5" "5" ...
##  $ BRS_1                    : chr [1:312] "3" "5" "5" "5" ...
##  $ BRS_2                    : chr [1:312] "1" "4" "5" "1" ...
##  $ BRS_3                    : chr [1:312] "4" "2" "5" "4" ...
##  $ BRS_4                    : chr [1:312] "4" "3" "5" "1" ...
##  $ BRS_5                    : chr [1:312] "3" "4" "5" "4" ...
##  $ BRS_6                    : chr [1:312] "1" "5" "5" "1" ...
##  $ age                      : chr [1:312] NA "21" "25" "29" ...
##  $ gender                   : chr [1:312] "1" "2" "2" "1" ...
##  $ gender_8_TEXT            : chr [1:312] NA NA NA NA ...
##  $ sexual_orientation       : chr [1:312] "5" "5" "5" "2" ...
##  $ sexual_orientation_9_TEXT: chr [1:312] NA NA NA NA ...
##  $ US_born                  : chr [1:312] "1" "1" "1" "1" ...
##  $ race                     : chr [1:312] "1" "2" "2" "1" ...
##  $ race_10_TEXT             : chr [1:312] NA NA NA NA ...
##  $ income                   : chr [1:312] "9" "1" "1" "2" ...
##  $ fam_income               : chr [1:312] "7" "2" "2" "2" ...
##  $ religion                 : chr [1:312] "4" "5" "4,5" "4" ...
##  $ religion_15_TEXT         : chr [1:312] NA NA NA NA ...
##  $ education                : chr [1:312] "6" "4" "4" "4" ...
##  $ education_9_TEXT         : chr [1:312] NA NA NA NA ...
##  $ year_education           : chr [1:312] "2017" "2022" "2022" "2022" ...
##  $ Q28                      : chr [1:312] NA NA NA NA ...
##  $ Q29...90                 : chr [1:312] NA NA NA NA ...
# Convert all characters into integers for anything that has a number
library(hablar)
## 
## Attaching package: 'hablar'
## The following object is masked from 'package:mice':
## 
##     squeeze
## The following object is masked from 'package:forcats':
## 
##     fct
## The following object is masked from 'package:tibble':
## 
##     num
## The following object is masked from 'package:dplyr':
## 
##     na_if
dat2 <- dat2 %>% #convert dataframe to integer
  convert(int(Progress:employment_status, OC_1:SS_12, PSNQ_1:gender, sexual_orientation, US_born, race, income:religion, education))
## Warning: There were 3 warnings in `mutate()`.
## The first warning was:
## ℹ In argument: `age = (structure(function (..., .x = ..1, .y = ..2, . = ..1)
##   ...`.
## Caused by warning in `as_reliable_int()`:
## ! NAs introduced by coercion
## ℹ Run `dplyr::last_dplyr_warnings()` to see the 2 remaining warnings.
str(dat2)
## tibble [312 × 75] (S3: tbl_df/tbl/data.frame)
##  $ Progress                 : int [1:312] 100 100 100 100 100 100 100 100 100 100 ...
##  $ Duration (in seconds)    : int [1:312] 803 1034 533 1547 1352 800 281 866 495 617 ...
##  $ consent                  : int [1:312] 4 4 4 4 4 4 4 4 4 4 ...
##  $ US_located_age           : int [1:312] NA NA NA NA NA NA NA NA NA NA ...
##  $ employment_status        : int [1:312] 1 2 1 2 1 1 1 1 1 1 ...
##  $ employment_status_TEXT   : chr [1:312] NA NA NA NA ...
##  $ PD                       : chr [1:312] "Associate Attorney at large international law firm." "The Information Technology (IT) profession involves designing, developing, managing, and maintaining computer s"| __truncated__ "Health education" "Education - I work as a college advisor for high school students and support them throughout the college application process." ...
##  $ OC_1                     : int [1:312] 3 4 3 1 5 6 5 5 3 4 ...
##  $ OC_2                     : int [1:312] 2 3 5 2 1 4 4 3 3 4 ...
##  $ SS_1                     : int [1:312] 6 7 1 5 7 6 6 5 6 6 ...
##  $ SS_2                     : int [1:312] 7 7 1 5 7 6 6 6 7 6 ...
##  $ SS_3                     : int [1:312] 6 6 1 4 6 6 6 5 6 6 ...
##  $ SS_4                     : int [1:312] 6 5 1 4 7 6 6 4 6 6 ...
##  $ SS_5                     : int [1:312] 7 7 1 5 7 7 6 4 5 7 ...
##  $ SS_6                     : int [1:312] 6 5 1 7 5 6 6 5 6 6 ...
##  $ SS_7                     : int [1:312] 6 5 1 7 6 5 6 4 6 6 ...
##  $ SS_8                     : int [1:312] 6 6 5 4 5 6 6 5 6 6 ...
##  $ SS_9                     : int [1:312] 6 5 5 7 5 6 6 5 6 6 ...
##  $ SS_10                    : int [1:312] 7 7 3 6 7 6 6 5 6 6 ...
##  $ SS_11                    : int [1:312] 7 6 5 4 6 5 6 5 6 6 ...
##  $ SS_12                    : int [1:312] 7 5 5 7 5 6 6 5 6 6 ...
##  $ SS_13_TEXT               : chr [1:312] "Mother and Friend" "Uncle(Family Member)" "Friend" "A friend who also is a mother-figure to me." ...
##  $ PSNQ_1                   : int [1:312] 6 6 6 4 6 6 6 6 2 6 ...
##  $ PSNQ_2                   : int [1:312] 5 6 6 6 6 6 6 6 3 7 ...
##  $ PSNQ_3                   : int [1:312] 5 6 7 7 6 6 6 6 4 6 ...
##  $ PSNQ_4                   : int [1:312] 5 6 7 6 NA 6 6 6 3 6 ...
##  $ PSNQ_5                   : int [1:312] 4 7 7 7 6 6 6 5 2 6 ...
##  $ PSNQ_6                   : int [1:312] 3 7 7 7 6 6 6 5 1 7 ...
##  $ PSNQ_7                   : int [1:312] 4 7 6 7 6 5 6 6 2 6 ...
##  $ PSNQ_8                   : int [1:312] 5 7 6 7 5 5 6 6 5 6 ...
##  $ PSNQ_9                   : int [1:312] 3 7 6 5 3 6 6 6 2 7 ...
##  $ PSNQ_10                  : int [1:312] 3 7 6 6 3 5 6 6 2 6 ...
##  $ SBS_1                    : int [1:312] 2 5 5 2 4 4 4 4 2 4 ...
##  $ SBS_2                    : int [1:312] 2 4 5 2 3 3 4 4 2 4 ...
##  $ SBS_3                    : int [1:312] 3 4 5 5 4 4 4 3 2 4 ...
##  $ SBS_4                    : int [1:312] 4 5 3 5 4 4 4 4 2 4 ...
##  $ SBS_5                    : int [1:312] 3 5 3 5 4 4 4 3 2 5 ...
##  $ SBS_6                    : int [1:312] 3 4 5 3 4 4 4 4 2 4 ...
##  $ SBS_7                    : int [1:312] 3 5 5 5 4 5 4 4 1 4 ...
##  $ PF_1                     : int [1:312] 5 7 6 7 6 6 6 6 2 6 ...
##  $ PF_2                     : int [1:312] 4 7 7 4 6 6 6 6 2 6 ...
##  $ PF_3                     : int [1:312] 5 7 7 6 6 6 6 5 5 6 ...
##  $ PF_4                     : int [1:312] 6 7 7 4 4 6 6 6 4 7 ...
##  $ PF_5                     : int [1:312] 6 7 7 7 6 6 6 5 6 6 ...
##  $ PF_6                     : int [1:312] 7 7 7 5 6 6 6 6 6 6 ...
##  $ PF_7                     : int [1:312] 6 7 7 6 5 6 6 6 2 6 ...
##  $ PF_8                     : int [1:312] 5 7 7 6 6 6 6 6 4 6 ...
##  $ CS_1                     : int [1:312] 4 4 5 5 5 5 4 4 1 4 ...
##  $ CS_2                     : int [1:312] 4 5 5 5 5 5 5 3 3 4 ...
##  $ CS_3                     : int [1:312] 5 5 4 5 2 5 5 3 1 4 ...
##  $ CS_4                     : int [1:312] 4 5 5 5 5 5 5 4 2 4 ...
##  $ CS_5                     : int [1:312] 5 5 5 5 4 4 5 4 2 4 ...
##  $ BRS_1                    : int [1:312] 3 5 5 5 4 5 5 4 4 4 ...
##  $ BRS_2                    : int [1:312] 1 4 5 1 4 1 1 2 2 4 ...
##  $ BRS_3                    : int [1:312] 4 2 5 4 4 5 5 4 4 4 ...
##  $ BRS_4                    : int [1:312] 4 3 5 1 4 1 1 2 2 4 ...
##  $ BRS_5                    : int [1:312] 3 4 5 4 3 5 5 4 4 4 ...
##  $ BRS_6                    : int [1:312] 1 5 5 1 2 1 1 2 2 4 ...
##  $ age                      : int [1:312] NA 21 25 29 NA NA 40 27 60 40 ...
##  $ gender                   : int [1:312] 1 2 2 1 1 2 1 2 1 5 ...
##  $ gender_8_TEXT            : chr [1:312] NA NA NA NA ...
##  $ sexual_orientation       : int [1:312] 5 5 5 2 5 9 5 5 5 5 ...
##  $ sexual_orientation_9_TEXT: chr [1:312] NA NA NA NA ...
##  $ US_born                  : int [1:312] 1 1 1 1 1 1 1 1 1 1 ...
##  $ race                     : int [1:312] 1 2 2 1 1 6 6 9 1 5 ...
##  $ race_10_TEXT             : chr [1:312] NA NA NA NA ...
##  $ income                   : int [1:312] 9 1 1 2 8 5 5 9 8 2 ...
##  $ fam_income               : int [1:312] 7 2 2 2 8 3 3 6 3 2 ...
##  $ religion                 : int [1:312] 4 5 NA 4 10 5 5 10 4 4 ...
##  $ religion_15_TEXT         : chr [1:312] NA NA NA NA ...
##  $ education                : int [1:312] 6 4 4 4 6 5 1 3 6 3 ...
##  $ education_9_TEXT         : chr [1:312] NA NA NA NA ...
##  $ year_education           : chr [1:312] "2017" "2022" "2022" "2022" ...
##  $ Q28                      : chr [1:312] NA NA NA NA ...
##  $ Q29...90                 : chr [1:312] NA NA NA NA ...
# Assuming your original dataset is called df
dat2$ID <- 1:nrow(dat2)

# View the dataset with the new ID variable
head(df)
##                                               
## 1 function (x, df1, df2, ncp, log = FALSE)    
## 2 {                                           
## 3     if (missing(ncp))                       
## 4         .Call(C_df, x, df1, df2, log)       
## 5     else .Call(C_dnf, x, df1, df2, ncp, log)
## 6 }
# duration
#*calculate average, SD, max/min Duration__in_seconds_ 
duration_stats <- psych::describe(dat2$`Duration (in seconds)`, na.rm = TRUE, ranges = TRUE)

# View the results
print(duration_stats)
##    vars   n   mean      sd median trimmed    mad min   max range skew kurtosis
## X1    1 312 924.68 1397.29    543   690.7 467.76   6 17152 17146 6.66    63.75
##       se
## X1 79.11

Mean: 924.68
SD: 1397.29
Max: 17152
Min: 6

ANALYZE ITEM LEVEL DATA MISSINGNESS AND PATTERNS

dat2_missingness <- (dplyr::select(dat2, employment_status, OC_1:SS_12, PSNQ_1:gender, sexual_orientation, US_born, race, income:religion, education))
# NOTE: excluded demographics textboxes
# analyze item level data missingness and patterns 
## typically want >80% of data 

# Calculate the number and proportion of item-level missingness
dat2_missingness$nmiss <- dat2_missingness %>%
    dplyr::select(employment_status:education) %>% # Select all variables between employment_status and education
    is.na %>% 
    rowSums

# Dynamically calculate the number of selected variables
num_vars <- dat2_missingness %>%
    dplyr::select(employment_status:education) %>%
    ncol()

# Calculate the proportion of missingness
dat2_missingness <- dat2_missingness %>%
  dplyr::mutate(prop_miss = (nmiss/num_vars)*100)

# Describe the missingness proportion
psych::describe(dat2_missingness$prop_miss)
##    vars   n  mean    sd median trimmed  mad min max range skew kurtosis   se
## X1    1 312 16.76 33.97   1.67    8.67 2.47   0 100   100 1.82     1.47 1.92

Across cases that were deemed eligible on the basis of the inclusion/exclusion criteria, missingness ranged from 0% to 100%

dat2_missingness <- dplyr::filter(dat2_missingness, prop_miss <= 90)
print(dat2_missingness)
## # A tibble: 271 × 62
##    employment_status  OC_1  OC_2  SS_1  SS_2  SS_3  SS_4  SS_5  SS_6  SS_7  SS_8
##                <int> <int> <int> <int> <int> <int> <int> <int> <int> <int> <int>
##  1                 1     3     2     6     7     6     6     7     6     6     6
##  2                 2     4     3     7     7     6     5     7     5     5     6
##  3                 1     3     5     1     1     1     1     1     1     1     5
##  4                 2     1     2     5     5     4     4     5     7     7     4
##  5                 1     5     1     7     7     6     7     7     5     6     5
##  6                 1     6     4     6     6     6     6     7     6     5     6
##  7                 1     5     4     6     6     6     6     6     6     6     6
##  8                 1     5     3     5     6     5     4     4     5     4     5
##  9                 1     3     3     6     7     6     6     5     6     6     6
## 10                 1     4     4     6     6     6     6     7     6     6     6
## # ℹ 261 more rows
## # ℹ 51 more variables: SS_9 <int>, SS_10 <int>, SS_11 <int>, SS_12 <int>,
## #   PSNQ_1 <int>, PSNQ_2 <int>, PSNQ_3 <int>, PSNQ_4 <int>, PSNQ_5 <int>,
## #   PSNQ_6 <int>, PSNQ_7 <int>, PSNQ_8 <int>, PSNQ_9 <int>, PSNQ_10 <int>,
## #   SBS_1 <int>, SBS_2 <int>, SBS_3 <int>, SBS_4 <int>, SBS_5 <int>,
## #   SBS_6 <int>, SBS_7 <int>, PF_1 <int>, PF_2 <int>, PF_3 <int>, PF_4 <int>,
## #   PF_5 <int>, PF_6 <int>, PF_7 <int>, PF_8 <int>, CS_1 <int>, CS_2 <int>, …

Filter the dataframe to include only those rows that have at least 90% of their data complete. This results in 271 valid cases remaining.

#further update to exclude the n_miss and prop_miss variables since these will mess with our analyses
dat2_missingness <- dat2_missingness %>%
  dplyr::select (-c(nmiss, prop_miss))

#what proportion of cells missing across entire dataset
formattable::percent(mean(is.na(dat2_missingness)))
## [1] 4.50%
#what proportion of cases (rows) are complete (nonmissing)
formattable::percent(mean(complete.cases(dat2_missingness)))
## [1] 47.60%

Among the cases that met the inclusion and exclusion criteria, missing data ranged from 0% to 100%. Overall, 4.50% of cells had missing data, while 47.60% of cases had no missing data.

ANALYZING MISSING DATA PATTERNS ITEM LEVEL

missing_data_patterns <- mice::md.pattern(dat2_missingness, plot = TRUE, rotate.names = TRUE)

missing_data_patterns
##     SS_2 SS_4 SS_9 SS_12 OC_1 SS_1 SS_3 SS_6 SS_5 SS_7 SS_10 employment_status
## 129    1    1    1     1    1    1    1    1    1    1     1                 1
## 42     1    1    1     1    1    1    1    1    1    1     1                 1
## 11     1    1    1     1    1    1    1    1    1    1     1                 1
## 9      1    1    1     1    1    1    1    1    1    1     1                 1
## 4      1    1    1     1    1    1    1    1    1    1     1                 1
## 1      1    1    1     1    1    1    1    1    1    1     1                 1
## 6      1    1    1     1    1    1    1    1    1    1     1                 1
## 5      1    1    1     1    1    1    1    1    1    1     1                 1
## 3      1    1    1     1    1    1    1    1    1    1     1                 1
## 1      1    1    1     1    1    1    1    1    1    1     1                 1
## 1      1    1    1     1    1    1    1    1    1    1     1                 1
## 1      1    1    1     1    1    1    1    1    1    1     1                 1
## 1      1    1    1     1    1    1    1    1    1    1     1                 1
## 1      1    1    1     1    1    1    1    1    1    1     1                 1
## 1      1    1    1     1    1    1    1    1    1    1     1                 1
## 1      1    1    1     1    1    1    1    1    1    1     1                 1
## 1      1    1    1     1    1    1    1    1    1    1     1                 1
## 1      1    1    1     1    1    1    1    1    1    1     1                 1
## 1      1    1    1     1    1    1    1    1    1    1     1                 1
## 1      1    1    1     1    1    1    1    1    1    1     1                 1
## 1      1    1    1     1    1    1    1    1    1    1     1                 1
## 1      1    1    1     1    1    1    1    1    1    1     1                 1
## 1      1    1    1     1    1    1    1    1    1    1     1                 1
## 1      1    1    1     1    1    1    1    1    1    1     1                 1
## 1      1    1    1     1    1    1    1    1    1    1     1                 1
## 4      1    1    1     1    1    1    1    1    1    1     1                 1
## 1      1    1    1     1    1    1    1    1    1    1     1                 1
## 1      1    1    1     1    1    1    1    1    1    1     1                 1
## 1      1    1    1     1    1    1    1    1    1    1     1                 1
## 2      1    1    1     1    1    1    1    1    1    1     1                 1
## 1      1    1    1     1    1    1    1    1    1    1     1                 1
## 1      1    1    1     1    1    1    1    1    1    1     1                 1
## 6      1    1    1     1    1    1    1    1    1    1     1                 1
## 2      1    1    1     1    1    1    1    1    1    1     1                 1
## 1      1    1    1     1    1    1    1    1    1    1     1                 1
## 1      1    1    1     1    1    1    1    1    1    1     1                 1
## 1      1    1    1     1    1    1    1    1    1    1     1                 1
## 1      1    1    1     1    1    1    1    1    1    1     1                 1
## 3      1    1    1     1    1    1    1    1    1    1     1                 1
## 2      1    1    1     1    1    1    1    1    1    1     1                 1
## 1      1    1    1     1    1    1    1    1    1    1     1                 1
## 1      1    1    1     1    1    1    1    1    1    1     1                 1
## 1      1    1    1     1    1    1    1    1    1    1     1                 1
## 1      1    1    1     1    1    1    1    1    1    1     1                 1
## 1      1    1    1     1    1    1    1    1    1    1     1                 1
## 1      1    1    1     1    1    1    1    1    1    1     1                 0
## 1      1    1    1     1    1    1    1    1    1    1     1                 0
## 1      1    1    1     1    1    1    1    1    1    1     1                 0
## 1      1    1    1     1    1    1    1    1    1    1     0                 1
## 1      1    1    1     1    1    1    1    1    1    1     0                 1
## 1      1    1    1     1    1    1    1    1    1    0     1                 1
## 2      1    1    1     1    1    1    1    1    0    1     1                 1
## 1      1    1    1     1    1    1    1    0    1    1     1                 1
## 1      1    1    1     1    1    1    0    1    1    0     1                 1
## 1      1    1    1     1    1    0    1    1    1    1     1                 1
## 1      1    1    1     1    0    1    1    1    1    1     1                 1
##        0    0    0     0    1    1    1    1    2    2     2                 3
##     OC_2 SS_8 SS_11 PSNQ_1 PSNQ_2 PSNQ_5 PSNQ_6 PSNQ_10 PSNQ_3 PSNQ_4 PSNQ_7
## 129    1    1     1      1      1      1      1       1      1      1      1
## 42     1    1     1      1      1      1      1       1      1      1      1
## 11     1    1     1      1      1      1      1       1      1      1      1
## 9      1    1     1      1      1      1      1       1      1      1      1
## 4      1    1     1      1      1      1      1       1      1      1      1
## 1      1    1     1      1      1      1      1       1      1      1      1
## 6      1    1     1      1      1      1      1       1      1      1      1
## 5      1    1     1      1      1      1      1       1      1      1      1
## 3      1    1     1      1      1      1      1       1      1      1      1
## 1      1    1     1      1      1      1      1       1      1      1      1
## 1      1    1     1      1      1      1      1       1      1      1      1
## 1      1    1     1      1      1      1      1       1      1      1      1
## 1      1    1     1      1      1      1      1       1      1      1      1
## 1      1    1     1      1      1      1      1       1      1      1      1
## 1      1    1     1      1      1      1      1       1      1      1      1
## 1      1    1     1      1      1      1      1       1      1      1      1
## 1      1    1     1      1      1      1      1       1      1      1      1
## 1      1    1     1      1      1      1      1       1      1      1      1
## 1      1    1     1      1      1      1      1       1      1      1      1
## 1      1    1     1      1      1      1      1       1      1      1      1
## 1      1    1     1      1      1      1      1       1      1      1      1
## 1      1    1     1      1      1      1      1       1      1      1      1
## 1      1    1     1      1      1      1      1       1      1      1      1
## 1      1    1     1      1      1      1      1       1      1      1      1
## 1      1    1     1      1      1      1      1       1      1      1      1
## 4      1    1     1      1      1      1      1       1      1      1      1
## 1      1    1     1      1      1      1      1       1      1      1      1
## 1      1    1     1      1      1      1      1       1      1      1      1
## 1      1    1     1      1      1      1      1       1      1      1      1
## 2      1    1     1      1      1      1      1       1      1      1      1
## 1      1    1     1      1      1      1      1       1      1      1      1
## 1      1    1     1      1      1      1      1       1      1      1      1
## 6      1    1     1      1      1      1      1       1      1      1      1
## 2      1    1     1      1      1      1      1       1      1      1      1
## 1      1    1     1      1      1      1      1       1      1      1      1
## 1      1    1     1      1      1      1      1       1      1      0      1
## 1      1    1     1      1      1      1      1       1      0      1      1
## 1      1    1     1      0      0      0      0       0      0      0      0
## 3      1    1     1      0      0      0      0       0      0      0      0
## 2      1    1     0      1      1      1      1       1      1      1      1
## 1      1    1     0      1      1      1      1       1      1      1      1
## 1      1    0     1      1      1      1      1       1      1      1      1
## 1      1    0     1      1      1      1      1       1      1      1      1
## 1      0    1     1      1      1      1      1       1      1      1      1
## 1      0    1     1      1      1      1      1       1      1      1      1
## 1      1    1     1      1      1      1      1       1      1      1      1
## 1      1    1     1      1      1      1      1       1      1      1      1
## 1      0    0     1      1      1      1      1       1      1      1      1
## 1      1    1     1      1      1      1      1       1      1      1      1
## 1      1    1     1      0      0      0      0       0      0      0      0
## 1      1    1     1      1      1      1      1       1      1      1      1
## 2      1    1     1      1      1      1      1       1      1      1      1
## 1      1    1     1      1      1      1      1       1      1      1      1
## 1      1    1     1      1      1      1      1       1      1      1      0
## 1      1    1     1      1      1      1      1       1      1      1      1
## 1      1    1     1      1      1      1      1       1      1      1      1
##        3    3     3      5      5      5      5       5      6      6      6
##     PSNQ_9 PSNQ_8 SBS_2 SBS_6 SBS_7 SBS_3 SBS_4 SBS_1 SBS_5 PF_1 PF_4 PF_5 PF_6
## 129      1      1     1     1     1     1     1     1     1    1    1    1    1
## 42       1      1     1     1     1     1     1     1     1    1    1    1    1
## 11       1      1     1     1     1     1     1     1     1    1    1    1    1
## 9        1      1     1     1     1     1     1     1     1    1    1    1    1
## 4        1      1     1     1     1     1     1     1     1    1    1    1    1
## 1        1      1     1     1     1     1     1     1     1    1    1    1    1
## 6        1      1     1     1     1     1     1     1     1    1    1    1    1
## 5        1      1     1     1     1     1     1     1     1    1    1    1    1
## 3        1      1     1     1     1     1     1     1     1    1    1    1    1
## 1        1      1     1     1     1     1     1     1     1    1    1    1    1
## 1        1      1     1     1     1     1     1     1     1    1    1    1    1
## 1        1      1     1     1     1     1     1     1     1    1    1    1    1
## 1        1      1     1     1     1     1     1     1     1    1    1    1    1
## 1        1      1     1     1     1     1     1     1     1    1    1    1    1
## 1        1      1     1     1     1     1     1     1     1    1    1    1    1
## 1        1      1     1     1     1     1     1     1     1    1    1    1    1
## 1        1      1     1     1     1     1     1     1     1    1    1    1    1
## 1        1      1     1     1     1     1     1     1     1    1    1    1    1
## 1        1      1     1     1     1     1     1     1     1    1    1    1    1
## 1        1      1     1     1     1     1     1     1     1    1    1    1    1
## 1        1      1     1     1     1     1     1     1     1    1    1    1    1
## 1        1      1     1     1     1     1     1     1     1    1    1    1    1
## 1        1      1     1     1     1     1     1     1     1    1    1    1    1
## 1        1      1     1     1     1     1     1     1     1    1    1    1    1
## 1        1      1     1     1     1     1     1     1     1    1    1    1    1
## 4        1      1     1     1     1     1     1     1     1    0    0    0    0
## 1        1      1     1     1     1     1     1     1     0    1    1    1    1
## 1        1      1     1     1     1     1     1     1     0    1    1    1    1
## 1        1      1     1     1     1     1     1     1     0    1    1    1    1
## 2        1      1     1     1     1     1     1     0     1    1    1    1    1
## 1        1      1     1     1     1     1     0     1     1    1    1    1    1
## 1        1      1     1     1     1     0     1     1     1    1    1    1    1
## 6        1      1     0     0     0     0     0     0     0    0    0    0    0
## 2        1      0     1     1     1     1     1     1     1    1    1    1    1
## 1        0      1     1     1     1     1     1     1     1    1    1    1    1
## 1        1      1     1     1     1     1     1     1     1    1    1    1    1
## 1        1      1     1     1     1     1     1     1     0    1    1    1    1
## 1        0      0     1     1     1     1     1     1     1    1    1    1    1
## 3        0      0     0     0     0     0     0     0     0    0    0    0    0
## 2        1      1     1     1     1     1     1     1     1    1    1    1    1
## 1        1      1     1     1     1     1     1     1     1    1    1    1    1
## 1        1      1     1     1     1     1     1     1     1    1    1    1    1
## 1        1      1     1     1     1     1     1     1     1    1    1    1    1
## 1        1      1     1     1     1     1     1     1     1    1    1    1    1
## 1        0      1     1     1     1     1     1     1     1    1    1    1    1
## 1        1      1     1     1     1     1     1     1     1    1    1    1    1
## 1        1      0     1     1     1     1     1     1     1    1    1    1    1
## 1        1      1     1     1     1     1     1     1     1    1    1    1    0
## 1        1      1     1     1     1     1     1     1     1    1    1    0    1
## 1        0      0     0     0     0     0     0     0     0    0    0    0    0
## 1        1      1     1     1     1     1     1     1     1    1    1    1    1
## 2        1      1     1     1     1     1     1     1     1    1    1    1    1
## 1        1      1     1     1     1     1     1     1     1    1    1    1    1
## 1        1      1     1     1     1     1     1     1     1    1    1    1    1
## 1        1      1     1     1     1     1     1     1     1    1    1    1    1
## 1        1      1     1     1     1     1     1     1     1    1    1    1    1
##          7      8    10    10    10    11    11    12    14   14   14   15   15
##     PF_8 CS_2 BRS_1 gender income fam_income PF_3 CS_1 BRS_2 BRS_5 BRS_6
## 129    1    1     1      1      1          1    1    1     1     1     1
## 42     1    1     1      1      1          1    1    1     1     1     1
## 11     1    1     1      1      1          1    1    1     1     1     1
## 9      1    1     1      1      1          1    1    1     1     1     1
## 4      1    1     1      1      1          1    1    1     1     1     1
## 1      1    1     1      1      1          1    1    1     1     1     1
## 6      1    1     1      1      1          1    1    1     1     1     1
## 5      1    1     1      1      1          1    1    1     1     1     1
## 3      1    1     1      1      1          1    1    1     1     1     1
## 1      1    1     1      1      1          1    1    1     1     1     1
## 1      1    1     1      1      1          1    1    1     1     1     1
## 1      1    1     1      1      1          1    1    1     1     1     1
## 1      1    1     1      1      1          1    1    1     1     1     1
## 1      1    1     1      1      1          1    1    1     1     1     1
## 1      1    1     1      1      1          1    1    1     1     1     1
## 1      1    1     1      1      1          1    1    1     1     1     1
## 1      1    1     1      1      1          1    1    1     1     1     1
## 1      1    1     1      1      1          1    1    1     1     1     1
## 1      1    1     1      1      1          1    1    1     1     1     1
## 1      1    1     1      1      1          1    1    1     1     1     1
## 1      1    1     1      1      1          1    1    0     1     1     1
## 1      1    1     1      1      1          1    0    1     1     1     1
## 1      1    1     0      0      0          0    1    1     0     0     0
## 1      1    0     1      1      1          1    1    0     1     1     1
## 1      0    1     1      1      1          1    1    1     1     1     1
## 4      0    0     0      0      0          0    0    0     0     0     0
## 1      1    1     1      1      1          1    1    1     1     1     1
## 1      1    1     1      1      1          1    1    1     1     1     1
## 1      1    1     1      1      1          1    1    1     1     1     1
## 2      1    1     1      1      1          1    1    1     1     1     1
## 1      1    1     1      1      1          1    1    1     1     1     1
## 1      1    1     1      1      1          1    0    1     1     1     1
## 6      0    0     0      0      0          0    0    0     0     0     0
## 2      1    1     1      1      1          1    1    1     1     1     1
## 1      1    1     1      1      1          1    1    1     1     1     1
## 1      1    1     1      1      1          1    1    1     1     1     1
## 1      1    1     1      1      1          1    1    1     1     1     1
## 1      1    1     1      1      1          1    1    1     1     1     1
## 3      0    0     0      0      0          0    0    0     0     0     0
## 2      1    1     1      1      1          1    1    1     1     1     1
## 1      1    1     1      1      1          1    1    1     0     0     1
## 1      1    1     1      1      1          1    1    1     1     1     1
## 1      1    1     1      1      1          1    1    1     1     1     1
## 1      1    1     1      1      1          1    1    1     1     1     1
## 1      1    1     1      1      1          1    1    1     1     1     1
## 1      1    1     1      1      1          1    1    1     1     1     1
## 1      1    1     1      1      1          1    1    1     1     1     1
## 1      1    1     1      1      1          1    1    1     1     1     0
## 1      1    1     1      1      1          1    1    1     1     1     1
## 1      0    0     0      0      0          0    0    0     0     0     0
## 1      1    1     1      1      1          1    1    1     1     1     1
## 2      1    1     1      1      1          1    1    1     1     1     1
## 1      1    1     1      1      1          1    1    1     1     1     1
## 1      1    1     1      1      1          1    1    1     1     1     1
## 1      1    1     1      1      1          1    1    1     1     1     1
## 1      1    1     1      1      1          1    1    1     1     1     1
##       15   15    15     15     15         15   16   16    16    16    16
##     sexual_orientation PF_7 CS_4 CS_5 BRS_3 PF_2 BRS_4 education CS_3 US_born
## 129                  1    1    1    1     1    1     1         1    1       1
## 42                   1    1    1    1     1    1     1         1    1       1
## 11                   1    1    1    1     1    1     1         1    1       1
## 9                    1    1    1    1     1    1     1         1    1       1
## 4                    1    1    1    1     1    1     1         1    1       1
## 1                    1    1    1    1     1    1     1         1    1       1
## 6                    1    1    1    1     1    1     1         1    1       1
## 5                    1    1    1    1     1    1     1         1    1       1
## 3                    1    1    1    1     1    1     1         1    1       0
## 1                    1    1    1    1     1    1     1         1    0       1
## 1                    1    1    1    1     1    1     1         0    1       1
## 1                    1    1    1    1     1    1     1         0    1       1
## 1                    1    1    1    1     1    1     1         0    1       0
## 1                    1    1    1    1     1    0     1         1    1       1
## 1                    1    1    1    1     1    0     1         1    1       1
## 1                    1    1    1    1     0    1     1         1    0       0
## 1                    1    1    1    0     1    1     1         1    1       1
## 1                    1    1    0    1     1    1     0         1    1       1
## 1                    1    0    1    1     1    1     1         1    1       1
## 1                    1    0    1    1     1    1     1         1    0       1
## 1                    1    1    1    1     1    1     1         1    1       1
## 1                    1    1    1    1     1    1     1         1    1       1
## 1                    0    1    1    1     0    1     0         0    1       0
## 1                    1    1    0    0     1    1     1         1    0       1
## 1                    1    1    1    1     1    1     1         1    1       1
## 4                    0    0    0    0     0    0     0         0    0       0
## 1                    1    1    1    1     1    1     1         1    1       1
## 1                    1    1    1    0     1    1     1         1    1       1
## 1                    0    1    1    1     1    1     0         1    1       1
## 2                    1    1    1    1     1    1     1         1    1       1
## 1                    1    1    1    1     1    1     1         1    1       1
## 1                    1    1    1    1     1    0     1         1    1       1
## 6                    0    0    0    0     0    0     0         0    0       0
## 2                    1    1    1    1     1    1     1         1    1       1
## 1                    1    1    1    1     1    0     1         1    1       1
## 1                    1    1    1    1     1    1     1         1    1       1
## 1                    1    1    1    1     1    1     1         1    1       1
## 1                    1    1    0    1     1    1     1         1    1       1
## 3                    0    0    0    0     0    0     0         0    0       0
## 2                    1    1    1    1     1    1     1         1    1       1
## 1                    1    1    1    1     1    1     1         1    1       1
## 1                    1    1    1    1     1    1     1         1    1       1
## 1                    1    1    1    1     1    1     1         1    1       1
## 1                    1    1    1    1     1    1     1         1    1       1
## 1                    1    1    1    1     1    1     1         1    1       1
## 1                    1    1    1    1     1    1     1         1    1       1
## 1                    1    1    1    1     0    1     1         1    1       1
## 1                    1    1    1    1     1    1     1         1    1       1
## 1                    1    0    1    1     1    1     0         1    0       1
## 1                    0    0    0    0     0    0     0         0    0       0
## 1                    1    1    1    1     1    1     1         1    1       1
## 2                    1    1    1    1     1    1     1         1    1       1
## 1                    1    1    1    1     1    1     1         1    1       1
## 1                    1    1    1    1     1    1     1         1    1       1
## 1                    1    1    1    1     1    1     1         1    1       1
## 1                    1    1    1    1     1    1     1         1    1       1
##                     16   17   17   17    17   18    18        18   19      20
##     race religion age    
## 129    1        1   1   0
## 42     1        1   0   1
## 11     1        0   1   1
## 9      1        0   0   2
## 4      0        1   1   1
## 1      0        1   0   2
## 6      0        0   1   2
## 5      0        0   0   3
## 3      1        1   1   1
## 1      1        1   1   1
## 1      1        1   1   1
## 1      1        1   0   2
## 1      1        1   0   3
## 1      1        1   1   1
## 1      1        1   0   2
## 1      1        1   0   4
## 1      1        1   1   1
## 1      1        1   0   3
## 1      1        1   0   2
## 1      1        1   0   3
## 1      1        1   1   1
## 1      1        1   1   1
## 1      0        0   0  15
## 1      1        1   1   5
## 1      1        1   0   2
## 4      0        0   0  28
## 1      1        1   0   2
## 1      0        1   0   4
## 1      1        1   1   3
## 2      1        1   1   1
## 1      1        1   1   1
## 1      1        1   1   3
## 6      0        0   0  35
## 2      1        1   1   1
## 1      1        1   1   2
## 1      1        1   0   2
## 1      1        1   1   2
## 1      1        1   1  11
## 3      0        0   0  45
## 2      1        1   1   1
## 1      1        1   0   4
## 1      1        1   1   1
## 1      0        1   1   2
## 1      1        1   1   1
## 1      1        1   1   2
## 1      1        1   1   1
## 1      1        1   1   3
## 1      1        0   0   7
## 1      1        0   0   7
## 1      0        0   0  46
## 1      1        1   1   1
## 2      1        1   1   1
## 1      1        1   1   1
## 1      1        0   1   4
## 1      1        1   1   1
## 1      1        0   1   2
##       33       50  86 732

RECODING and REVERSE SCORING

#BRS (scale 5 point, range 1-5), reverse score item 2,4,6

dat2 <- dat2 %>%
  dplyr::mutate(BRS_2r = 6 - BRS_2) %>%
  dplyr::mutate(BRS_4r = 6 - BRS_4) %>%
  dplyr::mutate(BRS_6r = 6 - BRS_6)  # Corrected
# Aggregate items into average subscales (STANDARDIZED)

#social support
SS <- c('SS_1','SS_2','SS_3','SS_4','SS_5','SS_6','SS_7','SS_8','SS_9','SS_10','SS_11','SS_12')

#perceived support network quality
PSNQ <- c('PSNQ_1', 'PSNQ_2', 'PSNQ_3', 'PSNQ_4', 'PSNQ_5', 'PSNQ_6', 'PSNQ_7', 'PSNQ_8', 'PSNQ_9', 'PSNQ_10')

#resilience
BRS <- c('BRS_1', 'BRS_2r', 'BRS_3', 'BRS_4r', 'BRS_5', 'BRS_6r')

#sense of belonging
SBS <- c('SBS_1', 'SBS_2', 'SBS_3', 'SBS_4', 'SBS_5', 'SBS_6', 'SBS_7')

#professional flourishing
PF <- c('PF_1','PF_2','PF_3','PF_4','PF_5','PF_6','PF_7','PF_8')

#career satisfaction
CS <- c('CS_1', 'CS_2','CS_3','CS_4','CS_5')

#organizational climate
OC <- c('OC_1', 'OC_2')

#create new variables #80% of items should be present to get the mean
dat2$SS_AVG <- mean_n(dat2[,SS], .80)
dat2$PSNQ_AVG <- mean_n(dat2[,PSNQ], .80)
dat2$BRS_AVG <- mean_n(dat2[,BRS], .80)
dat2$SBS_AVG <- mean_n(dat2[,SBS], .80)
dat2$PF_AVG <- mean_n(dat2[,PF], .80)
dat2$CS_AVG <- mean_n(dat2[,CS], .80)
dat2$OC_AVG <- mean_n(dat2[,OC], .80)
# Aggregate items into total sum scores (UNSTANDARDIZED)

#create new variables #80% of items should be present to get the mean
dat2$SS_total <- rowSums(dat2[,SS], .80)
dat2$PSNQ_total <- rowSums(dat2[,PSNQ], .80)
dat2$BRS_total <- rowSums(dat2[,BRS], .80)
dat2$SBS_total <- rowSums(dat2[,SBS], .80)
dat2$PF_total <- rowSums(dat2[,PF], .80)
dat2$CS_total <- rowSums(dat2[,CS], .80)
dat2$OC_total <- rowSums(dat2[,OC], .80)

INTERNAL CONSISTENCY OF SCALES

#social support
psych::alpha(dat2[c('SS_1','SS_2','SS_3','SS_4','SS_5','SS_6','SS_7','SS_8','SS_9','SS_10','SS_11','SS_12')])
## 
## Reliability analysis   
## Call: psych::alpha(x = dat2[c("SS_1", "SS_2", "SS_3", "SS_4", "SS_5", 
##     "SS_6", "SS_7", "SS_8", "SS_9", "SS_10", "SS_11", "SS_12")])
## 
##   raw_alpha std.alpha G6(smc) average_r S/N    ase mean   sd median_r
##       0.92      0.92    0.94      0.49  11 0.0071  5.3 0.84     0.47
## 
##     95% confidence boundaries 
##          lower alpha upper
## Feldt      0.9  0.92  0.93
## Duhachek   0.9  0.92  0.93
## 
##  Reliability if an item is dropped:
##       raw_alpha std.alpha G6(smc) average_r S/N alpha se var.r med.r
## SS_1       0.91      0.91    0.94      0.49  11   0.0075 0.018  0.48
## SS_2       0.91      0.91    0.94      0.48  10   0.0078 0.018  0.48
## SS_3       0.91      0.91    0.94      0.49  10   0.0078 0.018  0.48
## SS_4       0.91      0.91    0.94      0.49  11   0.0076 0.016  0.48
## SS_5       0.91      0.91    0.94      0.48  10   0.0080 0.019  0.46
## SS_6       0.91      0.91    0.94      0.48  10   0.0079 0.020  0.46
## SS_7       0.91      0.91    0.94      0.48  10   0.0079 0.019  0.46
## SS_8       0.91      0.92    0.94      0.50  11   0.0073 0.015  0.48
## SS_9       0.91      0.91    0.94      0.48  10   0.0078 0.018  0.46
## SS_10      0.91      0.91    0.94      0.48  10   0.0078 0.018  0.46
## SS_11      0.91      0.91    0.94      0.48  10   0.0080 0.020  0.45
## SS_12      0.91      0.91    0.94      0.49  10   0.0077 0.017  0.46
## 
##  Item statistics 
##         n raw.r std.r r.cor r.drop mean   sd
## SS_1  271  0.70  0.69  0.66   0.62  5.2 1.36
## SS_2  271  0.74  0.74  0.72   0.68  5.4 1.14
## SS_3  270  0.74  0.72  0.70   0.67  5.3 1.33
## SS_4  272  0.71  0.69  0.67   0.63  5.3 1.27
## SS_5  269  0.78  0.78  0.76   0.73  5.5 1.08
## SS_6  270  0.75  0.76  0.74   0.70  5.3 1.05
## SS_7  269  0.75  0.76  0.74   0.69  5.1 1.14
## SS_8  268  0.63  0.61  0.58   0.54  5.2 1.20
## SS_9  271  0.72  0.74  0.72   0.67  5.3 1.02
## SS_10 269  0.73  0.74  0.71   0.67  5.7 1.09
## SS_11 268  0.77  0.77  0.75   0.72  5.2 1.20
## SS_12 271  0.71  0.73  0.71   0.65  5.4 0.99
## 
## Non missing response frequency for each item
##          1    2    3    4    5    6    7 miss
## SS_1  0.01 0.05 0.04 0.13 0.39 0.20 0.19 0.13
## SS_2  0.01 0.00 0.05 0.08 0.38 0.29 0.19 0.13
## SS_3  0.01 0.03 0.06 0.14 0.26 0.34 0.17 0.13
## SS_4  0.01 0.02 0.06 0.12 0.32 0.32 0.15 0.13
## SS_5  0.00 0.00 0.04 0.09 0.37 0.28 0.22 0.14
## SS_6  0.00 0.00 0.05 0.11 0.38 0.33 0.11 0.13
## SS_7  0.01 0.02 0.04 0.15 0.45 0.20 0.13 0.14
## SS_8  0.01 0.03 0.05 0.10 0.40 0.27 0.14 0.14
## SS_9  0.00 0.01 0.02 0.15 0.42 0.24 0.15 0.13
## SS_10 0.00 0.01 0.04 0.04 0.34 0.32 0.25 0.14
## SS_11 0.00 0.03 0.05 0.14 0.40 0.22 0.16 0.14
## SS_12 0.00 0.01 0.01 0.12 0.44 0.26 0.16 0.13
#perceived social network quality
psych::alpha(dat2[c('PSNQ_1', 'PSNQ_2', 'PSNQ_3', 'PSNQ_4', 'PSNQ_5', 'PSNQ_6', 'PSNQ_7', 'PSNQ_8', 'PSNQ_9', 'PSNQ_10')])
## 
## Reliability analysis   
## Call: psych::alpha(x = dat2[c("PSNQ_1", "PSNQ_2", "PSNQ_3", "PSNQ_4", 
##     "PSNQ_5", "PSNQ_6", "PSNQ_7", "PSNQ_8", "PSNQ_9", "PSNQ_10")])
## 
##   raw_alpha std.alpha G6(smc) average_r S/N   ase mean   sd median_r
##       0.92      0.92    0.93      0.54  12 0.007  5.6 0.84     0.55
## 
##     95% confidence boundaries 
##          lower alpha upper
## Feldt      0.9  0.92  0.93
## Duhachek   0.9  0.92  0.93
## 
##  Reliability if an item is dropped:
##         raw_alpha std.alpha G6(smc) average_r  S/N alpha se  var.r med.r
## PSNQ_1       0.91      0.92    0.92      0.55 10.9   0.0073 0.0120  0.56
## PSNQ_2       0.92      0.92    0.92      0.56 11.3   0.0070 0.0082  0.56
## PSNQ_3       0.91      0.91    0.93      0.53 10.3   0.0077 0.0151  0.56
## PSNQ_4       0.91      0.91    0.93      0.54 10.4   0.0077 0.0147  0.54
## PSNQ_5       0.91      0.91    0.92      0.52  9.8   0.0081 0.0131  0.54
## PSNQ_6       0.91      0.91    0.92      0.53 10.3   0.0078 0.0122  0.55
## PSNQ_7       0.91      0.91    0.92      0.53 10.0   0.0079 0.0130  0.55
## PSNQ_8       0.91      0.91    0.93      0.54 10.5   0.0076 0.0127  0.56
## PSNQ_9       0.91      0.91    0.92      0.53 10.2   0.0079 0.0131  0.55
## PSNQ_10      0.91      0.91    0.92      0.53 10.0   0.0080 0.0127  0.55
## 
##  Item statistics 
##           n raw.r std.r r.cor r.drop mean   sd
## PSNQ_1  266  0.71  0.71  0.68   0.63  5.5 1.15
## PSNQ_2  266  0.66  0.66  0.63   0.57  5.7 1.09
## PSNQ_3  265  0.77  0.77  0.74   0.71  5.6 1.04
## PSNQ_4  265  0.76  0.76  0.71   0.69  5.6 1.23
## PSNQ_5  266  0.83  0.83  0.81   0.78  5.5 1.04
## PSNQ_6  266  0.78  0.77  0.75   0.71  5.4 1.21
## PSNQ_7  265  0.80  0.81  0.79   0.75  5.7 0.97
## PSNQ_8  263  0.74  0.74  0.71   0.68  5.7 0.98
## PSNQ_9  264  0.79  0.78  0.76   0.72  5.4 1.26
## PSNQ_10 266  0.80  0.80  0.78   0.75  5.6 1.04
## 
## Non missing response frequency for each item
##            1    2    3    4    5    6    7 miss
## PSNQ_1  0.00 0.03 0.03 0.13 0.16 0.52 0.13 0.15
## PSNQ_2  0.00 0.02 0.03 0.05 0.27 0.41 0.22 0.15
## PSNQ_3  0.00 0.02 0.03 0.09 0.22 0.49 0.16 0.15
## PSNQ_4  0.00 0.06 0.01 0.06 0.19 0.48 0.20 0.15
## PSNQ_5  0.00 0.02 0.02 0.11 0.28 0.42 0.15 0.15
## PSNQ_6  0.01 0.02 0.06 0.07 0.26 0.43 0.15 0.15
## PSNQ_7  0.00 0.02 0.00 0.09 0.23 0.49 0.17 0.15
## PSNQ_8  0.01 0.00 0.02 0.07 0.22 0.50 0.18 0.16
## PSNQ_9  0.00 0.02 0.08 0.11 0.19 0.42 0.18 0.15
## PSNQ_10 0.00 0.01 0.04 0.09 0.18 0.53 0.15 0.15
#resilience
psych::alpha(dat2[c('BRS_1', 'BRS_2r', 'BRS_3', 'BRS_4r', 'BRS_5', 'BRS_6r')])
## Warning in psych::alpha(dat2[c("BRS_1", "BRS_2r", "BRS_3", "BRS_4r", "BRS_5", : Some items were negatively correlated with the first principal component and probably 
## should be reversed.  
## To do this, run the function again with the 'check.keys=TRUE' option
## Some items ( BRS_3 BRS_5 ) were negatively correlated with the first principal component and 
## probably should be reversed.  
## To do this, run the function again with the 'check.keys=TRUE' option
## 
## Reliability analysis   
## Call: psych::alpha(x = dat2[c("BRS_1", "BRS_2r", "BRS_3", "BRS_4r", 
##     "BRS_5", "BRS_6r")])
## 
##   raw_alpha std.alpha G6(smc) average_r S/N   ase mean   sd median_r
##       0.65      0.64    0.76      0.23 1.7 0.031  3.3 0.64     0.11
## 
##     95% confidence boundaries 
##          lower alpha upper
## Feldt     0.59  0.65  0.71
## Duhachek  0.59  0.65  0.71
## 
##  Reliability if an item is dropped:
##        raw_alpha std.alpha G6(smc) average_r S/N alpha se var.r  med.r
## BRS_1       0.62      0.58    0.71      0.22 1.4    0.033 0.144 -0.014
## BRS_2r      0.54      0.56    0.68      0.20 1.3    0.043 0.100  0.116
## BRS_3       0.68      0.65    0.75      0.27 1.8    0.027 0.119  0.116
## BRS_4r      0.56      0.57    0.68      0.21 1.3    0.042 0.088  0.070
## BRS_5       0.66      0.64    0.74      0.26 1.8    0.029 0.119  0.116
## BRS_6r      0.53      0.55    0.66      0.20 1.2    0.044 0.083  0.075
## 
##  Item statistics 
##          n raw.r std.r r.cor r.drop mean   sd
## BRS_1  256  0.54  0.62  0.50   0.34  3.9 0.90
## BRS_2r 255  0.73  0.66  0.62   0.54  2.7 1.15
## BRS_3  254  0.40  0.47  0.33   0.15  3.6 0.98
## BRS_4r 253  0.72  0.64  0.61   0.50  2.9 1.22
## BRS_5  255  0.40  0.50  0.37   0.19  3.7 0.85
## BRS_6r 255  0.76  0.68  0.68   0.56  2.8 1.21
## 
## Non missing response frequency for each item
##           1    2    3    4    5 miss
## BRS_1  0.00 0.09 0.13 0.50 0.27 0.18
## BRS_2r 0.11 0.44 0.17 0.20 0.08 0.18
## BRS_3  0.02 0.13 0.26 0.41 0.18 0.19
## BRS_4r 0.14 0.29 0.21 0.26 0.10 0.19
## BRS_5  0.01 0.08 0.24 0.52 0.15 0.18
## BRS_6r 0.16 0.29 0.22 0.25 0.08 0.18
#sense of belonging
psych::alpha(dat2[c('SBS_1', 'SBS_2', 'SBS_3', 'SBS_4', 'SBS_5', 'SBS_6', 'SBS_7')])
## 
## Reliability analysis   
## Call: psych::alpha(x = dat2[c("SBS_1", "SBS_2", "SBS_3", "SBS_4", "SBS_5", 
##     "SBS_6", "SBS_7")])
## 
##   raw_alpha std.alpha G6(smc) average_r S/N   ase mean   sd median_r
##       0.88      0.88    0.88      0.51 7.2 0.011  3.7 0.68     0.49
## 
##     95% confidence boundaries 
##          lower alpha upper
## Feldt     0.85  0.88   0.9
## Duhachek  0.86  0.88   0.9
## 
##  Reliability if an item is dropped:
##       raw_alpha std.alpha G6(smc) average_r S/N alpha se  var.r med.r
## SBS_1      0.86      0.86    0.85      0.50 6.0    0.013 0.0054  0.49
## SBS_2      0.85      0.86    0.85      0.50 5.9    0.013 0.0061  0.51
## SBS_3      0.86      0.86    0.86      0.51 6.3    0.012 0.0092  0.49
## SBS_4      0.86      0.86    0.85      0.50 6.1    0.012 0.0087  0.47
## SBS_5      0.86      0.86    0.86      0.51 6.3    0.012 0.0073  0.49
## SBS_6      0.86      0.86    0.86      0.51 6.2    0.012 0.0075  0.51
## SBS_7      0.86      0.86    0.86      0.51 6.2    0.012 0.0100  0.47
## 
##  Item statistics 
##         n raw.r std.r r.cor r.drop mean   sd
## SBS_1 259  0.78  0.77  0.74   0.69  3.5 0.97
## SBS_2 261  0.79  0.78  0.75   0.70  3.4 0.94
## SBS_3 260  0.74  0.74  0.67   0.63  3.8 0.92
## SBS_4 260  0.76  0.77  0.72   0.67  3.9 0.81
## SBS_5 257  0.73  0.74  0.69   0.63  3.8 0.86
## SBS_6 261  0.76  0.75  0.70   0.65  3.6 0.92
## SBS_7 261  0.75  0.76  0.70   0.66  3.9 0.86
## 
## Non missing response frequency for each item
##          1    2    3    4    5 miss
## SBS_1 0.01 0.16 0.31 0.38 0.14 0.17
## SBS_2 0.01 0.16 0.34 0.36 0.12 0.16
## SBS_3 0.00 0.07 0.30 0.37 0.26 0.17
## SBS_4 0.01 0.02 0.29 0.45 0.23 0.17
## SBS_5 0.00 0.07 0.30 0.42 0.21 0.18
## SBS_6 0.01 0.08 0.35 0.36 0.19 0.16
## SBS_7 0.00 0.08 0.15 0.54 0.22 0.16
#professional flourishing
psych::alpha(dat2[c('PF_1','PF_2','PF_3','PF_4','PF_5','PF_6','PF_7','PF_8')])
## 
## Reliability analysis   
## Call: psych::alpha(x = dat2[c("PF_1", "PF_2", "PF_3", "PF_4", "PF_5", 
##     "PF_6", "PF_7", "PF_8")])
## 
##   raw_alpha std.alpha G6(smc) average_r S/N    ase mean  sd median_r
##       0.94      0.94    0.93      0.65  15 0.0052  5.8 0.9     0.65
## 
##     95% confidence boundaries 
##          lower alpha upper
## Feldt     0.93  0.94  0.95
## Duhachek  0.93  0.94  0.95
## 
##  Reliability if an item is dropped:
##      raw_alpha std.alpha G6(smc) average_r S/N alpha se  var.r med.r
## PF_1      0.92      0.92    0.92      0.63  12   0.0066 0.0037  0.62
## PF_2      0.93      0.93    0.92      0.64  13   0.0062 0.0044  0.64
## PF_3      0.93      0.93    0.93      0.67  14   0.0056 0.0032  0.67
## PF_4      0.93      0.93    0.92      0.65  13   0.0060 0.0053  0.64
## PF_5      0.93      0.93    0.93      0.66  14   0.0057 0.0041  0.66
## PF_6      0.93      0.93    0.93      0.66  13   0.0058 0.0041  0.64
## PF_7      0.92      0.92    0.92      0.64  12   0.0063 0.0040  0.64
## PF_8      0.93      0.93    0.93      0.66  14   0.0057 0.0049  0.67
## 
##  Item statistics 
##        n raw.r std.r r.cor r.drop mean   sd
## PF_1 257  0.90  0.89  0.89   0.86  5.6 1.24
## PF_2 253  0.87  0.86  0.84   0.82  5.6 1.17
## PF_3 255  0.79  0.78  0.74   0.72  5.8 1.04
## PF_4 257  0.84  0.84  0.81   0.78  5.8 1.09
## PF_5 256  0.79  0.79  0.76   0.73  5.9 0.94
## PF_6 256  0.81  0.81  0.78   0.75  5.9 1.01
## PF_7 254  0.87  0.88  0.86   0.83  5.8 1.14
## PF_8 256  0.80  0.81  0.77   0.74  5.8 0.93
## 
## Non missing response frequency for each item
##      1    2    3    4    5    6    7 miss
## PF_1 0 0.03 0.05 0.10 0.18 0.41 0.23 0.18
## PF_2 0 0.02 0.05 0.09 0.21 0.42 0.21 0.19
## PF_3 0 0.02 0.01 0.08 0.21 0.42 0.26 0.18
## PF_4 0 0.01 0.04 0.09 0.15 0.46 0.25 0.18
## PF_5 0 0.00 0.01 0.08 0.20 0.43 0.28 0.18
## PF_6 0 0.01 0.02 0.09 0.16 0.46 0.27 0.18
## PF_7 0 0.01 0.04 0.08 0.18 0.41 0.27 0.19
## PF_8 0 0.00 0.02 0.05 0.22 0.48 0.23 0.18
#career success
psych::alpha(dat2[c('CS_1', 'CS_2','CS_3','CS_4','CS_5')])
## 
## Reliability analysis   
## Call: psych::alpha(x = dat2[c("CS_1", "CS_2", "CS_3", "CS_4", "CS_5")])
## 
##   raw_alpha std.alpha G6(smc) average_r S/N   ase mean   sd median_r
##       0.85      0.85    0.83      0.54 5.8 0.013    4 0.72     0.54
## 
##     95% confidence boundaries 
##          lower alpha upper
## Feldt     0.83  0.85  0.88
## Duhachek  0.83  0.85  0.88
## 
##  Reliability if an item is dropped:
##      raw_alpha std.alpha G6(smc) average_r S/N alpha se  var.r med.r
## CS_1      0.85      0.85    0.81      0.58 5.5    0.014 0.0020  0.56
## CS_2      0.81      0.81    0.77      0.52 4.3    0.017 0.0058  0.52
## CS_3      0.81      0.81    0.77      0.52 4.3    0.017 0.0022  0.54
## CS_4      0.82      0.82    0.78      0.53 4.5    0.017 0.0055  0.54
## CS_5      0.83      0.83    0.79      0.55 4.8    0.016 0.0065  0.54
## 
##  Item statistics 
##        n raw.r std.r r.cor r.drop mean   sd
## CS_1 255  0.71  0.73  0.62   0.57  4.0 0.81
## CS_2 256  0.83  0.82  0.77   0.71  4.0 0.93
## CS_3 252  0.84  0.82  0.78   0.71  4.0 1.07
## CS_4 254  0.81  0.81  0.75   0.70  4.0 0.89
## CS_5 254  0.78  0.78  0.70   0.65  4.1 0.83
## 
## Non missing response frequency for each item
##         1    2    3    4    5 miss
## CS_1 0.01 0.05 0.08 0.60 0.26 0.18
## CS_2 0.00 0.09 0.13 0.42 0.36 0.18
## CS_3 0.03 0.10 0.09 0.41 0.37 0.19
## CS_4 0.02 0.04 0.17 0.47 0.31 0.19
## CS_5 0.00 0.06 0.13 0.50 0.31 0.19
#organizational climate
psych::alpha(dat2[c('OC_1', 'OC_2')])
## 
## Reliability analysis   
## Call: psych::alpha(x = dat2[c("OC_1", "OC_2")])
## 
##   raw_alpha std.alpha G6(smc) average_r S/N   ase mean   sd median_r
##       0.54      0.54    0.37      0.37 1.2 0.051  3.6 0.96     0.37
## 
##     95% confidence boundaries 
##          lower alpha upper
## Feldt     0.42  0.54  0.63
## Duhachek  0.44  0.54  0.64
## 
##  Reliability if an item is dropped:
##      raw_alpha std.alpha G6(smc) average_r S/N alpha se var.r med.r
## OC_1      0.45      0.37    0.14      0.37 0.6       NA     0  0.37
## OC_2      0.31      0.37    0.14      0.37 0.6       NA     0  0.37
## 
##  Item statistics 
##        n raw.r std.r r.cor r.drop mean  sd
## OC_1 280  0.86  0.83  0.51   0.37  3.9 1.3
## OC_2 278  0.79  0.83  0.51   0.37  3.3 1.0
## 
## Non missing response frequency for each item
##         1    2    3    4    5    6 miss
## OC_1 0.07 0.00 0.26 0.40 0.12 0.14 0.10
## OC_2 0.06 0.15 0.28 0.41 0.10 0.00 0.11

MISSING ANALYSIS: SCALE LEVEL WITH STANDARDIZED (AVG) SCORES

dat2_missingness2 <-(dplyr::select (dat2, OC_AVG, SS_AVG:CS_AVG))
#In the script below we create a variable that counts the number of missing variables and then creates a proportion by dividing it by the number of total variables.

#Create a variable (n_miss) that counts the number missing
dat2_missingness2$n_miss <- dat2_missingness2%>%
  dplyr::select(OC_AVG, SS_AVG:CS_AVG) %>% 
is.na %>% 
rowSums

#Create a proportion missing by dividing n_miss by the total number of variables (6)
#Pipe to sort in order of descending frequency to get a sense of the missingness
dat2_missingness2<- dat2_missingness2%>%
 dplyr::mutate(prop_miss = (n_miss/6)*100)%>%
  arrange(desc(n_miss))

psych::describe(dat2_missingness2$prop_miss)
##    vars   n  mean    sd median trimmed mad min    max  range skew kurtosis   se
## X1    1 312 18.22 39.49      0    8.27   0   0 116.67 116.67 1.85     1.61 2.24

Across 312 cases for which the scoring protocol was applied, missingness ranged from 0% to 116.67%

# Filter out cases with 20% or more missing data
fgpdata_scored <- dplyr::filter(dat2_missingness2, prop_miss <= 20)

# Select only the relevant columns (if necessary)
fgpdata_scored <- dplyr::select(fgpdata_scored, OC_AVG, SS_AVG:CS_AVG)

# Save the cleaned data to a CSV file
#write.csv(fgpdata_scored, file = "fgpdata_average_scores.csv", row.names = FALSE)

# Optionally, check the number of cases retained
num_cases_retained <- nrow(fgpdata_scored)
print(num_cases_retained)
## [1] 257
# Assuming your original dataset is called df
fgpdata_scored$ID <- 1:nrow(fgpdata_scored)

# View the dataset with the new ID variable
head(fgpdata_scored)
## # A tibble: 6 × 8
##   OC_AVG SS_AVG PSNQ_AVG BRS_AVG SBS_AVG PF_AVG CS_AVG    ID
##    <dbl>  <dbl>    <dbl>   <dbl>   <dbl>  <dbl>  <dbl> <int>
## 1    3.5   4       NA       3.33    3.14   3.88   3.25     1
## 2    4     5.5      5.9     3.33    4.43   6     NA        2
## 3   NA     5.75     5.33    2.83    3.57   5.12   4.2      3
## 4    3     3.08     2.9    NA       2.57   3      2.4      4
## 5   NA     4        4.8     2       3.14   3.62   3.2      5
## 6   NA     4.82     5.6     3.6     3.86   6      4        6

After eliminating cases with greater than 20% missing, the dataset analyzed included 257 cases.

REVISITING MISSING ANALYSIS AT THE SCALE LEVEL

#percent missing across df
formattable::percent(mean(is.na(fgpdata_scored)))
## [1] 0.39%
#percent of rows with nonmissing data
formattable::percent(mean(complete.cases(fgpdata_scored))) 
## [1] 96.89%

In this dataset, missing data was minimal, with only 0.44% of the values missing overall. Additionally, 96.89% of the rows had complete data at the scale level.

mice_ScaleLvl_fgp <- mice::md.pattern(fgpdata_scored, plot = TRUE, rotate.names=TRUE)

mice_ScaleLvl_fgp 
##     SS_AVG SBS_AVG PF_AVG ID PSNQ_AVG CS_AVG BRS_AVG OC_AVG  
## 249      1       1      1  1        1      1       1      1 0
## 4        1       1      1  1        1      1       1      0 1
## 2        1       1      1  1        1      1       0      1 1
## 1        1       1      1  1        1      0       1      1 1
## 1        1       1      1  1        0      1       1      1 1
##          0       0      0  0        1      1       2      4 8

MISSING ANALYSIS: SCALE LEVEL WITH UNSTANDARDIZED (TOTAL) SCORES

dat2_missingness3 <-(dplyr::select (dat2, OC_total, SS_total:CS_total))
#In the script below we create a variable that counts the number of missing variables and then creates a proportion by dividing it by the number of total variables.

#Create a variable (n_miss) that counts the number missing
dat2_missingness3$n_miss <- dat2_missingness3%>%
  dplyr::select(OC_total, SS_total:CS_total) %>% 
is.na %>% 
rowSums

#Create a proportion missing by dividing n_miss by the total number of variables (6)
#Pipe to sort in order of descending frequency to get a sense of the missingness
dat2_missingness3<- dat2_missingness3%>%
 dplyr::mutate(prop_miss = (n_miss/6)*100)%>%
  arrange(desc(n_miss))

psych::describe(dat2_missingness3$prop_miss)
##    vars   n mean sd median trimmed mad min max range skew kurtosis se
## X1    1 312    0  0      0       0   0   0   0     0  NaN      NaN  0

Across 312 cases for which the scoring protocol was applied, missingness ranged from 0% to 0%

# Filter out cases with 20% or more missing data
fgpdata_scored2 <- dplyr::filter(dat2_missingness3, prop_miss <= 20)

# Select only the relevant columns (if necessary)
fgpdata_scored2 <- dplyr::select(fgpdata_scored2, OC_total, SS_total:CS_total)

# Save the cleaned data to a CSV file
#write.csv(fgpdata_scored2, file = "fgpdata_total_scores.csv", row.names = FALSE)

# Optionally, check the number of cases retained
num_cases_retained2 <- nrow(fgpdata_scored2)
print(num_cases_retained2)
## [1] 312

After eliminating cases with greater than 20% missing, the dataset remained the same at 312

REVISITING MISSING ANALYSIS AT THE SCALE LEVEL

#percent missing across df
formattable::percent(mean(is.na(fgpdata_scored2)))
## [1] 0.00%
#percent of rows with nonmissing data
formattable::percent(mean(complete.cases(fgpdata_scored2))) 
## [1] 100.00%
mice_ScaleLvl_fgp2 <- mice::md.pattern(fgpdata_scored2, plot = TRUE, rotate.names=TRUE)
##  /\     /\
## {  `---'  }
## {  O   O  }
## ==>  V <==  No need for mice. This data set is completely observed.
##  \  \|/  /
##   `-----'

mice_ScaleLvl_fgp2 
##     OC_total SS_total PSNQ_total BRS_total SBS_total PF_total CS_total  
## 312        1        1          1         1         1        1        1 0
##            0        0          0         0         0        0        0 0

DATA DIAGNOSTICS

ANALYZE SCALE LEVEL ASSUMPTIONS WITH STANDARDIZED DATASET

fgpdata_Subscales <-(dplyr::select (fgpdata_scored, OC_AVG, SS_AVG:CS_AVG))
#skew and kurtosis
descriptives <- psych::describe(fgpdata_Subscales, type = 1)
descriptives
##          vars   n mean   sd median trimmed  mad  min max range  skew kurtosis
## OC_AVG      1 253 3.67 0.94   4.00    3.72 0.74 1.00 5.5  4.50 -0.59     0.41
## SS_AVG      2 257 5.36 0.84   5.27    5.36 0.83 2.50 7.0  4.50 -0.16     0.07
## PSNQ_AVG    3 256 5.60 0.84   5.70    5.64 0.65 2.50 7.0  4.50 -0.76     0.99
## BRS_AVG     4 255 3.28 0.64   3.17    3.25 0.49 1.00 5.0  4.00  0.43     0.86
## SBS_AVG     5 257 3.71 0.67   3.57    3.69 0.64 1.86 5.0  3.14  0.14    -0.55
## PF_AVG      6 257 5.76 0.90   6.00    5.83 0.74 2.88 7.0  4.12 -0.79     0.32
## CS_AVG      7 256 4.02 0.72   4.20    4.08 0.59 1.40 5.0  3.60 -0.81     0.32
##            se
## OC_AVG   0.06
## SS_AVG   0.05
## PSNQ_AVG 0.05
## BRS_AVG  0.04
## SBS_AVG  0.04
## PF_AVG   0.06
## CS_AVG   0.05

All skew values fall below 3.0 All kurtosis fall below 10 *As per Kline, 2016

#normality
shapiro.test(fgpdata_Subscales$SS_AVG)
## 
##  Shapiro-Wilk normality test
## 
## data:  fgpdata_Subscales$SS_AVG
## W = 0.98729, p-value = 0.0226
shapiro.test(fgpdata_Subscales$PSNQ_AVG)
## 
##  Shapiro-Wilk normality test
## 
## data:  fgpdata_Subscales$PSNQ_AVG
## W = 0.95562, p-value = 0.0000004639
shapiro.test(fgpdata_Subscales$BRS_AVG)
## 
##  Shapiro-Wilk normality test
## 
## data:  fgpdata_Subscales$BRS_AVG
## W = 0.94655, p-value = 0.00000004898
shapiro.test(fgpdata_Subscales$SBS_AVG)
## 
##  Shapiro-Wilk normality test
## 
## data:  fgpdata_Subscales$SBS_AVG
## W = 0.97696, p-value = 0.0003519
shapiro.test(fgpdata_Subscales$PF_AVG)
## 
##  Shapiro-Wilk normality test
## 
## data:  fgpdata_Subscales$PF_AVG
## W = 0.93998, p-value = 0.000000009607
shapiro.test(fgpdata_Subscales$CS_AVG)
## 
##  Shapiro-Wilk normality test
## 
## data:  fgpdata_Subscales$CS_AVG
## W = 0.93674, p-value = 0.000000004962
shapiro.test(fgpdata_Subscales$OC_AVG)
## 
##  Shapiro-Wilk normality test
## 
## data:  fgpdata_Subscales$OC_AVG
## W = 0.9488, p-value = 0.00000009337

When the p-value from the Shapiro-Wilk test is less than 0.05, it indicates that the variable’s distribution significantly deviates from a normal distribution. In other words, the test suggests that the data is not normally distributed.

#histogram and QQplot
psych::pairs.panels(fgpdata_Subscales[c("OC_AVG", "SS_AVG", "PSNQ_AVG", "BRS_AVG", "SBS_AVG", "PF_AVG", "CS_AVG" )], stars = TRUE, lm = TRUE)

#histogram and QQplot
psych::pairs.panels(fgpdata_Subscales[c("OC_AVG", "SS_AVG", "PSNQ_AVG", "BRS_AVG", "SBS_AVG", "PF_AVG", "CS_AVG" )], stars = TRUE, lm = TRUE)

# Add correlation matrix analysis for multicollinearity
cor_matrix <- cor(fgpdata_Subscales[c("OC_AVG", "SS_AVG", "PSNQ_AVG", "BRS_AVG", "SBS_AVG", "PF_AVG", "CS_AVG")], use="pairwise.complete.obs")
print(cor_matrix)
##              OC_AVG    SS_AVG  PSNQ_AVG    BRS_AVG   SBS_AVG    PF_AVG
## OC_AVG   1.00000000 0.2131389 0.2090145 0.03434331 0.2473263 0.1913719
## SS_AVG   0.21313893 1.0000000 0.6478417 0.18695464 0.6562127 0.6557773
## PSNQ_AVG 0.20901445 0.6478417 1.0000000 0.14791338 0.7221333 0.7991001
## BRS_AVG  0.03434331 0.1869546 0.1479134 1.00000000 0.1641415 0.3068954
## SBS_AVG  0.24732634 0.6562127 0.7221333 0.16414153 1.0000000 0.7378996
## PF_AVG   0.19137187 0.6557773 0.7991001 0.30689541 0.7378996 1.0000000
## CS_AVG   0.20390353 0.3922572 0.6444424 0.14895883 0.4754319 0.6015086
##             CS_AVG
## OC_AVG   0.2039035
## SS_AVG   0.3922572
## PSNQ_AVG 0.6444424
## BRS_AVG  0.1489588
## SBS_AVG  0.4754319
## PF_AVG   0.6015086
## CS_AVG   1.0000000
# Optional: format the correlation matrix for better readability
round(cor_matrix, 2)
##          OC_AVG SS_AVG PSNQ_AVG BRS_AVG SBS_AVG PF_AVG CS_AVG
## OC_AVG     1.00   0.21     0.21    0.03    0.25   0.19   0.20
## SS_AVG     0.21   1.00     0.65    0.19    0.66   0.66   0.39
## PSNQ_AVG   0.21   0.65     1.00    0.15    0.72   0.80   0.64
## BRS_AVG    0.03   0.19     0.15    1.00    0.16   0.31   0.15
## SBS_AVG    0.25   0.66     0.72    0.16    1.00   0.74   0.48
## PF_AVG     0.19   0.66     0.80    0.31    0.74   1.00   0.60
## CS_AVG     0.20   0.39     0.64    0.15    0.48   0.60   1.00
#Mahalanobis distance test
fgpdata_Subscales$Mahal <- psych::outlier(fgpdata_Subscales[c("OC_AVG", "SS_AVG", "PSNQ_AVG", "BRS_AVG", "SBS_AVG", "PF_AVG", "CS_AVG" )])

psych::describe(fgpdata_Subscales$Mahal)
##    vars   n mean   sd median trimmed mad  min   max range skew kurtosis   se
## X1    1 257 6.96 6.77   4.83    5.67 3.4 0.68 54.25 53.57 3.12    13.29 0.42
# creates a variable indicating TRUE or FALSE if an item is an
# outlier
fgpdata_scored$MOutlier <- dplyr::if_else(fgpdata_Subscales$Mahal > (median(fgpdata_Subscales$Mahal) +
    (3 * sd(fgpdata_Subscales$Mahal))), TRUE, FALSE)

# shows us the first 6 rows of the data so we can see the new
# variables (Mahal, MOutlier)
head(fgpdata_scored)
## # A tibble: 6 × 9
##   OC_AVG SS_AVG PSNQ_AVG BRS_AVG SBS_AVG PF_AVG CS_AVG    ID MOutlier
##    <dbl>  <dbl>    <dbl>   <dbl>   <dbl>  <dbl>  <dbl> <int> <lgl>   
## 1    3.5   4       NA       3.33    3.14   3.88   3.25     1 FALSE   
## 2    4     5.5      5.9     3.33    4.43   6     NA        2 FALSE   
## 3   NA     5.75     5.33    2.83    3.57   5.12   4.2      3 FALSE   
## 4    3     3.08     2.9    NA       2.57   3      2.4      4 FALSE   
## 5   NA     4        4.8     2       3.14   3.62   3.2      5 FALSE   
## 6   NA     4.82     5.6     3.6     3.86   6      4        6 FALSE
# Count the number of outliers and non-outliers
OutlierCount <- fgpdata_scored %>%
    dplyr::count(MOutlier)

# Number of outliers
num_outliers <- OutlierCount %>% filter(MOutlier == TRUE) %>% pull(n)

# Number of non-outliers
num_non_outliers <- OutlierCount %>% filter(MOutlier == FALSE) %>% pull(n)

# Alternatively, calculate directly
num_outliers <- sum(fgpdata_scored$MOutlier == TRUE)
num_non_outliers <- sum(fgpdata_scored$MOutlier == FALSE)

# Print the results
cat("Number of outliers:", num_outliers, "\n")
## Number of outliers: 8
cat("Number of non-outliers:", num_non_outliers, "\n")
## Number of non-outliers: 249

At this stage did a visual inspection of the data and removed duplicate rows again leading to a final dataset of 230 observations.

# Create a new dataframe with only the non-outliers (assuming non-outliers are marked as FALSE in the MOutlier column)
non_outliers_df <- fgpdata_scored %>% 
    dplyr::filter(MOutlier == FALSE)

# View the first few rows of the non-outliers dataframe
head(non_outliers_df)
## # A tibble: 6 × 9
##   OC_AVG SS_AVG PSNQ_AVG BRS_AVG SBS_AVG PF_AVG CS_AVG    ID MOutlier
##    <dbl>  <dbl>    <dbl>   <dbl>   <dbl>  <dbl>  <dbl> <int> <lgl>   
## 1    3.5   4       NA       3.33    3.14   3.88   3.25     1 FALSE   
## 2    4     5.5      5.9     3.33    4.43   6     NA        2 FALSE   
## 3   NA     5.75     5.33    2.83    3.57   5.12   4.2      3 FALSE   
## 4    3     3.08     2.9    NA       2.57   3      2.4      4 FALSE   
## 5   NA     4        4.8     2       3.14   3.62   3.2      5 FALSE   
## 6   NA     4.82     5.6     3.6     3.86   6      4        6 FALSE
# Optionally, check the number of rows to confirm it has 308 observations
cat("Number of non-outliers:", nrow(non_outliers_df), "\n")
## Number of non-outliers: 249
# Count the number of outliers and non-outliers
OutlierCount <- non_outliers_df %>%
    dplyr::count(MOutlier)

# Number of outliers
num_outliers <- OutlierCount %>% filter(MOutlier == TRUE) %>% pull(n)

# Number of non-outliers
num_non_outliers <- OutlierCount %>% filter(MOutlier == FALSE) %>% pull(n)

# Alternatively, calculate directly
num_outliers <- sum(non_outliers_df$MOutlier == TRUE)
num_non_outliers <- sum(non_outliers_df$MOutlier == FALSE)

# Print the results
cat("Number of outliers:", num_outliers, "\n")
## Number of outliers: 0
cat("Number of non-outliers:", num_non_outliers, "\n")
## Number of non-outliers: 249

Multicollinearity

# Check multicollinearity among all variables in your SEM model
library(car)
## Loading required package: carData
## 
## Attaching package: 'car'
## The following object is masked from 'package:psych':
## 
##     logit
## The following object is masked from 'package:purrr':
## 
##     some
## The following object is masked from 'package:dplyr':
## 
##     recode
# Clean data for analysis
vif_data <- na.omit(fgpdata_Subscales[c("OC_AVG", "SS_AVG", "PSNQ_AVG", "BRS_AVG")])

# 1. First, check correlation matrix - this is a simple way to identify potential multicollinearity
cor_matrix <- cor(vif_data)
print("Correlation matrix among predictors and mediators:")
## [1] "Correlation matrix among predictors and mediators:"
print(cor_matrix)
##              OC_AVG    SS_AVG  PSNQ_AVG    BRS_AVG
## OC_AVG   1.00000000 0.2086877 0.2042018 0.03439422
## SS_AVG   0.20868771 1.0000000 0.6351809 0.18485230
## PSNQ_AVG 0.20420185 0.6351809 1.0000000 0.14304933
## BRS_AVG  0.03439422 0.1848523 0.1430493 1.00000000
# 2. For a more formal VIF analysis, we need models with multiple predictors
# For your mediator relationships
mediator_model1 <- lm(PSNQ_AVG ~ SS_AVG + OC_AVG, data=vif_data)
cat("\nVIF for Perceived Support Network Quality model:\n")
## 
## VIF for Perceived Support Network Quality model:
vif(mediator_model1)
##   SS_AVG   OC_AVG 
## 1.045534 1.045534
mediator_model2 <- lm(BRS_AVG ~ SS_AVG + OC_AVG, data=vif_data)
cat("\nVIF for Resilience model:\n")
## 
## VIF for Resilience model:
vif(mediator_model2)
##   SS_AVG   OC_AVG 
## 1.045534 1.045534
# 3. For your outcome variables (checking all predictors)
# Create a complete dataset for all variables
full_data <- na.omit(fgpdata_Subscales[c("OC_AVG", "SS_AVG", "PSNQ_AVG", "BRS_AVG", 
                                        "SBS_AVG", "PF_AVG", "CS_AVG")])

# For each outcome
outcome_model1 <- lm(SBS_AVG ~ SS_AVG + PSNQ_AVG + BRS_AVG + OC_AVG, data=full_data)
cat("\nVIF for Sense of Belonging outcome model:\n")
## 
## VIF for Sense of Belonging outcome model:
vif(outcome_model1)
##   SS_AVG PSNQ_AVG  BRS_AVG   OC_AVG 
## 1.720826 1.693303 1.036606 1.054938
outcome_model2 <- lm(PF_AVG ~ SS_AVG + PSNQ_AVG + BRS_AVG + OC_AVG, data=full_data)
cat("\nVIF for Professional Flourishing outcome model:\n")
## 
## VIF for Professional Flourishing outcome model:
vif(outcome_model2)
##   SS_AVG PSNQ_AVG  BRS_AVG   OC_AVG 
## 1.720826 1.693303 1.036606 1.054938
outcome_model3 <- lm(CS_AVG ~ SS_AVG + PSNQ_AVG + BRS_AVG + OC_AVG, data=full_data)
cat("\nVIF for Career Satisfaction outcome model:\n")
## 
## VIF for Career Satisfaction outcome model:
vif(outcome_model3)
##   SS_AVG PSNQ_AVG  BRS_AVG   OC_AVG 
## 1.720826 1.693303 1.036606 1.054938
# 4. Check the correlation between outcome variables
cat("\nCorrelation between outcome variables:\n")
## 
## Correlation between outcome variables:
cor_outcomes <- cor(full_data[c("SBS_AVG", "PF_AVG", "CS_AVG")])
print(cor_outcomes)
##           SBS_AVG    PF_AVG    CS_AVG
## SBS_AVG 1.0000000 0.7391738 0.4639356
## PF_AVG  0.7391738 1.0000000 0.5874109
## CS_AVG  0.4639356 0.5874109 1.0000000
# Interpret the results
cat("\nVIF values > 5 suggest moderate multicollinearity")
## 
## VIF values > 5 suggest moderate multicollinearity
cat("\nVIF values > 10 suggest severe multicollinearity\n")
## 
## VIF values > 10 suggest severe multicollinearity
cat("\nCorrelations > 0.7 between predictors suggest potential multicollinearity issues\n")
## 
## Correlations > 0.7 between predictors suggest potential multicollinearity issues

FINAL CSV FILE OF CLEANED AND SCORED DATA WITH DEMOGRAPHICS, AVERAGES, AND TOTALS

# Assuming both data frames have a common key, such as "ID"
merged_df <- merge(non_outliers_df, dat2, by = "ID")

# Select only the desired columns
fgpdata_dem_avg <- merged_df[, c(
  "OC_AVG.x", "SBS_AVG.x", "SS_AVG.x", "BRS_AVG.x", "PSNQ_AVG.x", "PF_AVG.x", "CS_AVG.x", 
  "employment_status", "employment_status_TEXT", "PD", "SS_13_TEXT", "age", "year_education", 
  "education", "education_9_TEXT", "religion", "religion_15_TEXT", "income", 
  "fam_income", "race", "race_10_TEXT", "US_born", "sexual_orientation", 
  "sexual_orientation_9_TEXT", "gender", "gender_8_TEXT"
)]

names(fgpdata_dem_avg) <- c(
  "OC_AVG", "SBS_AVG", "SS_AVG", "BRS_AVG", "PSNQ_AVG", "PF_AVG", "CS_AVG", 
  "employment_status", "employment_status_TEXT", "PD", "SS_13_TEXT", "age",
  "year_education", "education", "education_9_TEXT", "religion", 
  "religion_15_TEXT", "income", "fam_income", "race", "race_10_TEXT", 
  "US_born", "sexual_orientation", "sexual_orientation_9_TEXT", 
  "gender", "gender_8_TEXT"
)

write.csv(fgpdata_dem_avg, "fgpdata_dem_avg.csv")