Minu’s Poster Analysis

Load Libraries

This is where we load the libraries that we use in our analysis.

library(psych)
library(dplyr)

Attaching package: 'dplyr'
The following objects are masked from 'package:stats':

    filter, lag
The following objects are masked from 'package:base':

    intersect, setdiff, setequal, union
library(kableExtra)

Attaching package: 'kableExtra'
The following object is masked from 'package:dplyr':

    group_rows
library(nFactors)
Loading required package: lattice

Attaching package: 'nFactors'
The following object is masked from 'package:lattice':

    parallel

Load Data

This is where we load the data and do a bit of cleaning. We remove the extra rows from Qualtrics, indicate which sample is which, and remove participants who didn’t reach the end of the survey. ANd then we combine both samples into a single file.

df1 <- read.csv(file="data/Navigating Psych (SONA)_December 12, 2024_08.15.csv", header=T)
labels1 <- data.frame(t(cbind(df1[1,],samp="Sample Source")))
df1 <- df1[-c(1,2), ]
df1 <- subset(df1, Progress == "100", select=-c(4,14:15))
df1$samp <- "SONA"

df2 <- read.csv(file="data/Navigating psych (Prolific Final)_December 5, 2024_07.40.csv", header=T)
labels2 <- data.frame(t(cbind(df2[1,],samp="Sample Source")))
df2 <- df2[-c(1,2), ]
df2_quality <- subset(df2, select=c(8,173:175))
df2 <- subset(df2, Progress == "100" & Q45 == 2 & Q46 < 4, select=-c(15:17,173:175))
colnames(df2)[169] <- "id"
df2$samp <- "Prolific"

df <- rbind.data.frame(df1,df2)

Attention Checks

This is where we check responses to the attention check items and yeet anyone who did respond correctly.

attn <- subset(labels1, grepl("attention", X1))
attn_chk <- rownames(attn)

# 3, 2, 5, 1, 1
correct_responses <- c(3, 2, 5, 1, 1)

# Check if the values in df match the correct responses
correct_counts <- apply(df[attn_chk], 1, function(row) sum(row == correct_responses))

# Display results
table(correct_counts)
correct_counts
  0   1   2   3   4   5 
 11  13  31  37  21 402 
df$attn <- correct_counts

df <- subset(df, attn > 3)

bad_ids <- c("663e9d84425d4157a22f3167",
            "666f47d2c81ed466b97cd82d",
            "634ef87aec0966557c825573",
            "669d8f03a8742ea4d11b35d8",
            "6701a4d42a2e8c434e0fc50a",
            "671736c2a43ff993ff9a070b",
            "67348c5f2fd2234d5e8f7d58")

df <- subset(df, !(id %in% bad_ids))

Descriptives

Subset

Take analytical items and pysch topic items out of larger data set to analyze them separatley.

rm(df1, df2)

df2 <- subset(df, select = grep("^Q27.1|Q10|Q11|Q12|Q13|Q14|Q15|Q16|Q17", colnames(df), value = TRUE))
df2 <- subset(df2, select = setdiff(colnames(df2), attn_chk))

Clean Analytical

We needed to turn the free response analytical thinking responses into yes/no coded variables. started in R but switched to Excel because it was easier.

# table(df2$Q10)
# df2$Q10_rc <- 0
# df2$Q10_rc[df2$Q10 == "200"] <- 1
# df2$Q10_rc[df2$Q10 == "200 "] <- 1
# df2$Q10_rc[df2$Q10 == "200 crate"] <- 1
# df2$Q10_rc[df2$Q10 == "200 crates"] <- 1
# df2$Q10_rc[df2$Q10 == "200 crates "] <- 1
# df2$Q10_rc[df2$Q10 == "200 Crates"] <- 1
# df2$Q10_rc[df2$Q10 == "200 crates of oranges"] <- 1
# df2$Q10_rc[df2$Q10 == "200 crates of oranges remained "] <- 1
# df2$Q10_rc[df2$Q10 == "200 crates were left"] <- 1
# df2$Q10_rc[df2$Q10 == "200 crates were left "] <- 1
# df2$Q10_rc[df2$Q10 == "200 were left"] <- 1
# 
# 
# table(df2$Q10,df2$Q10_rc)
# 
# table(df$Q11)

df2$id <- 1:nrow(df2)
# write.csv(subset(df2, select=c(1:8,id)), file="analytic.csv", row.names = F)
analytic <- read.csv(file="analytic.csv", header=T)
analytic2 <- subset(analytic, select=c(id,grep("_rc", colnames(analytic))))
df3 <- merge(df2,analytic2,by="id")

Calculate averages

calc simple and analytical thinking scores.

df3$simple <- rowSums(subset(df3, select=c(Q10_rc,Q11_rc,Q12_rc,Q13_rc)))
df3$logic <- rowSums(subset(df3, select=c(Q14_rc,Q15_rc,G16_rc,Q17_rc)))

Describe

We had to recode our variables as numeric so we could calculate descriptive statistics. we used describe command to view descriotives and check skew and kurtosis.

# Convert all columns in df2 to numeric
df4 <- df3
df4[] <- lapply(df3, function(x) as.numeric(x))
Warning in FUN(X[[i]], ...): NAs introduced by coercion
Warning in FUN(X[[i]], ...): NAs introduced by coercion
Warning in FUN(X[[i]], ...): NAs introduced by coercion
Warning in FUN(X[[i]], ...): NAs introduced by coercion
Warning in FUN(X[[i]], ...): NAs introduced by coercion
Warning in FUN(X[[i]], ...): NAs introduced by coercion
Warning in FUN(X[[i]], ...): NAs introduced by coercion
Warning in FUN(X[[i]], ...): NAs introduced by coercion
df4 <- subset(df4, select=c(grep("Q27", colnames(df4)),simple,logic))
# Get descriptive statistics for df2
desc_stats <- describe(df4)

# Create a new column to identify rows with skew or kurtosis greater than 2
desc_stats <- desc_stats %>%
  mutate(highlight = ifelse(abs(skew) > 2 | abs(kurtosis) > 2, "highlight", "no"))

# Create a table with highlighted rows for high skewness or kurtosis
desc_stats %>%
  kable(format = "html", digits = 2) %>%
  kable_styling(full_width = FALSE, bootstrap_options = c("striped", "hover", "condensed")) %>%
  row_spec(which(desc_stats$highlight == "highlight"), background = "red")
vars n mean sd median trimmed mad min max range skew kurtosis se highlight
Q27.1_1 1 416 2.41 1.06 2 2.33 1.48 1 5 4 0.77 -0.04 0.05 no
Q27.1_2 2 413 2.44 1.22 2 2.33 1.48 1 5 4 0.61 -0.62 0.06 no
Q27.1_3 3 414 2.66 1.17 2 2.60 1.48 1 5 4 0.40 -0.72 0.06 no
Q27.1_4 4 413 2.86 1.26 3 2.82 1.48 1 5 4 0.28 -1.02 0.06 no
Q27.1_5 5 413 2.43 1.22 2 2.33 1.48 1 5 4 0.50 -0.77 0.06 no
Q27.1_6 6 415 2.38 1.23 2 2.27 1.48 1 5 4 0.60 -0.69 0.06 no
Q27.1_7 7 411 3.16 1.19 3 3.17 1.48 1 5 4 -0.04 -0.97 0.06 no
Q27.1_8 8 413 2.75 1.26 3 2.69 1.48 1 5 4 0.21 -1.03 0.06 no
Q27.1_9 9 415 2.22 1.16 2 2.09 1.48 1 5 4 0.84 -0.18 0.06 no
Q27.1_10 10 415 2.07 1.07 2 1.92 1.48 1 5 4 0.98 0.29 0.05 no
Q27.1_11 11 416 2.33 1.16 2 2.22 1.48 1 5 4 0.72 -0.31 0.06 no
Q27.1_12 12 416 2.48 1.21 2 2.39 1.48 1 5 4 0.49 -0.71 0.06 no
Q27.1_13 13 415 2.11 1.13 2 1.96 1.48 1 5 4 0.90 0.04 0.06 no
Q27.1_14 14 413 2.50 1.26 2 2.39 1.48 1 5 4 0.49 -0.83 0.06 no
Q27.1_16 15 414 2.80 1.15 3 2.77 1.48 1 5 4 0.21 -0.84 0.06 no
Q27.1_17 16 415 2.27 1.16 2 2.13 1.48 1 5 4 0.81 -0.17 0.06 no
Q27.1_18 17 416 2.65 1.19 2 2.59 1.48 1 5 4 0.36 -0.87 0.06 no
Q27.1_19 18 415 2.32 1.22 2 2.19 1.48 1 5 4 0.73 -0.47 0.06 no
Q27.1_20 19 416 2.24 1.14 2 2.10 1.48 1 5 4 0.84 -0.04 0.06 no
Q27.1_21 20 413 2.34 1.12 2 2.25 1.48 1 5 4 0.65 -0.39 0.06 no
Q27.1_22 21 414 2.65 1.16 2 2.59 1.48 1 5 4 0.45 -0.67 0.06 no
Q27.1_23 22 416 2.45 1.19 2 2.36 1.48 1 5 4 0.51 -0.68 0.06 no
Q27.1_24 23 416 2.88 1.21 3 2.84 1.48 1 5 4 0.20 -0.93 0.06 no
Q27.1_25 24 415 2.27 1.18 2 2.15 1.48 1 5 4 0.69 -0.48 0.06 no
simple 25 420 3.69 0.62 4 3.82 0.00 0 4 4 -2.66 9.41 0.03 highlight
logic 26 420 2.46 1.03 3 2.51 1.48 0 4 4 -0.58 -0.27 0.05 no

Histograms

hist(df3$simple)

hist(df3$logic)

Factor Analysis

used this to identify the latent variables latent variables - show which variables are moving tog because of an underlying cause

d <- na.omit(subset(df4, select=c(grep("Q27",colnames(df4)))))

names(d)
 [1] "Q27.1_1"  "Q27.1_2"  "Q27.1_3"  "Q27.1_4"  "Q27.1_5"  "Q27.1_6" 
 [7] "Q27.1_7"  "Q27.1_8"  "Q27.1_9"  "Q27.1_10" "Q27.1_11" "Q27.1_12"
[13] "Q27.1_13" "Q27.1_14" "Q27.1_16" "Q27.1_17" "Q27.1_18" "Q27.1_19"
[19] "Q27.1_20" "Q27.1_21" "Q27.1_22" "Q27.1_23" "Q27.1_24" "Q27.1_25"
labs <- c("Depression",
"Love",
"Toxic People",
"Anxiety",
"Dating",
"Attraction",
"Health & Wellbeing",
"Self",
"Narcissism",
"Interpersonal Abuse",
"Intelligence",
"Trauma",
"Psychiatric Symptoms",
"Friendship",
"Helpful Advice",
"Introversion",
"Good & Bad Habits",
"Family",
"Language & Communication",
"Manipulativeness",
"Facts & Information",
"Childhood",
"Changing & Improving Your Life",
"Fakeness")

colnames(d) <- labs

ev <- eigen(cor(d)) # get eigenvalues
ap <- parallel(subject=nrow(d),var=ncol(d),rep=100,cent=.05) # run the parallel analysis, gives us another perspective on how many factors should be used in the model
nS <- nScree(x=ev$values, aparallel=ap$eigen$qevpea) # creates the scree plot
plotnScree(nS) # shows us the scree plot, look for the elbows

EFA <- factanal(d, factors = 3, rotation = "promax")
print(EFA, digits=3, cutoff=.4, sort=T)

Call:
factanal(x = d, factors = 3, rotation = "promax")

Uniquenesses:
                    Depression                           Love 
                         0.661                          0.356 
                  Toxic People                        Anxiety 
                         0.381                          0.540 
                        Dating                     Attraction 
                         0.326                          0.263 
            Health & Wellbeing                           Self 
                         0.519                          0.428 
                    Narcissism            Interpersonal Abuse 
                         0.437                          0.298 
                  Intelligence                         Trauma 
                         0.528                          0.322 
          Psychiatric Symptoms                     Friendship 
                         0.389                          0.380 
                Helpful Advice                   Introversion 
                         0.364                          0.506 
             Good & Bad Habits                         Family 
                         0.321                          0.478 
      Language & Communication               Manipulativeness 
                         0.412                          0.384 
           Facts & Information                      Childhood 
                         0.482                          0.422 
Changing & Improving Your Life                       Fakeness 
                         0.399                          0.447 

Loadings:
                               Factor1 Factor2 Factor3
Self                            0.591                 
Intelligence                    0.609                 
Helpful Advice                  0.818                 
Introversion                    0.653                 
Good & Bad Habits               0.946                 
Language & Communication        0.727                 
Facts & Information             0.700                 
Changing & Improving Your Life  0.858                 
Fakeness                        0.509                 
Depression                              0.519         
Toxic People                            0.680         
Narcissism                              0.754         
Interpersonal Abuse                     0.963         
Trauma                                  0.862         
Psychiatric Symptoms                    0.879         
Manipulativeness                        0.619         
Love                                            0.757 
Dating                                          0.849 
Attraction                                      0.821 
Anxiety                                 0.422         
Health & Wellbeing                                    
Friendship                      0.489                 
Family                                  0.470         
Childhood                       0.411   0.450         

               Factor1 Factor2 Factor3
SS loadings      5.526   5.027   2.429
Proportion Var   0.230   0.209   0.101
Cumulative Var   0.230   0.440   0.541

Factor Correlations:
        Factor1 Factor2 Factor3
Factor1   1.000   0.657   0.772
Factor2   0.657   1.000   0.623
Factor3   0.772   0.623   1.000

Test of the hypothesis that 3 factors are sufficient.
The chi square statistic is 781.34 on 207 degrees of freedom.
The p-value is 1.37e-67 
# 1,3,4,9,10,12,13,19,21
# 2,5,6

df4$selfhelp <- rowMeans(subset(df4, select=c("Q27.1_8","Q27.1_11","Q27.1_16","Q27.1_17","Q27.1_18","Q27.1_20","Q27.1_22","Q27.1_24","Q27.1_25")))
df4$dysfunction <- rowMeans(subset(df4, select=c("Q27.1_1","Q27.1_3","Q27.1_4","Q27.1_9","Q27.1_10","Q27.1_12","Q27.1_13","Q27.1_19","Q27.1_21","Q27.1_4")))
df4$love <- rowMeans(subset(df4, select=c("Q27.1_2","Q27.1_5","Q27.1_6")))

correlation test

which variables correlate with the simple/analytical questions and between the topics

str(df4)
'data.frame':   420 obs. of  29 variables:
 $ Q27.1_1    : num  3 3 3 2 3 2 2 2 4 1 ...
 $ Q27.1_2    : num  4 4 5 5 3 2 2 1 2 2 ...
 $ Q27.1_3    : num  4 4 3 1 3 2 3 2 1 3 ...
 $ Q27.1_4    : num  4 4 5 5 3 2 3 2 5 3 ...
 $ Q27.1_5    : num  4 2 5 5 4 2 3 3 3 3 ...
 $ Q27.1_6    : num  4 2 5 5 5 2 2 3 1 3 ...
 $ Q27.1_7    : num  4 3 5 4 3 3 1 4 5 2 ...
 $ Q27.1_8    : num  4 4 4 3 5 3 1 3 4 2 ...
 $ Q27.1_9    : num  3 4 3 2 5 1 2 2 1 2 ...
 $ Q27.1_10   : num  2 4 1 1 4 2 2 2 1 2 ...
 $ Q27.1_11   : num  2 4 5 4 3 1 1 1 2 2 ...
 $ Q27.1_12   : num  2 4 5 1 4 2 3 2 2 3 ...
 $ Q27.1_13   : num  2 3 3 3 3 1 1 2 1 2 ...
 $ Q27.1_14   : num  4 3 5 5 3 2 2 2 2 3 ...
 $ Q27.1_16   : num  4 4 4 5 4 2 1 3 3 3 ...
 $ Q27.1_17   : num  3 4 1 4 2 1 1 1 1 3 ...
 $ Q27.1_18   : num  3 2 4 4 3 2 1 3 2 3 ...
 $ Q27.1_19   : num  4 3 4 3 3 1 1 1 1 2 ...
 $ Q27.1_20   : num  3 2 4 3 5 2 1 1 2 4 ...
 $ Q27.1_21   : num  2 4 4 3 4 2 2 2 2 4 ...
 $ Q27.1_22   : num  3 4 5 3 1 2 1 3 2 3 ...
 $ Q27.1_23   : num  3 3 4 3 3 2 1 2 2 3 ...
 $ Q27.1_24   : num  4 4 4 3 4 2 3 3 3 3 ...
 $ Q27.1_25   : num  3 3 4 5 3 2 2 3 3 4 ...
 $ simple     : num  4 4 4 3 4 4 3 3 4 4 ...
 $ logic      : num  3 4 1 3 2 4 3 3 3 2 ...
 $ selfhelp   : num  3.22 3.44 3.89 3.78 3.33 ...
 $ dysfunction: num  3 3.7 3.6 2.6 3.5 1.7 2.2 1.9 2.3 2.5 ...
 $ love       : num  4 2.67 5 5 4 ...
corr.test(subset(df4, select=c(simple,logic,dysfunction,love,selfhelp)))
Call:corr.test(x = subset(df4, select = c(simple, logic, dysfunction, 
    love, selfhelp)))
Correlation matrix 
            simple logic dysfunction  love selfhelp
simple        1.00  0.22        0.01 -0.05    -0.08
logic         0.22  1.00       -0.14 -0.02    -0.07
dysfunction   0.01 -0.14        1.00  0.61     0.75
love         -0.05 -0.02        0.61  1.00     0.61
selfhelp     -0.08 -0.07        0.75  0.61     1.00
Sample Size 
            simple logic dysfunction love selfhelp
simple         420   420         407  410      407
logic          420   420         407  410      407
dysfunction    407   407         407  403      402
love           410   410         403  410      402
selfhelp       407   407         402  402      407
Probability values (Entries above the diagonal are adjusted for multiple tests.) 
            simple logic dysfunction love selfhelp
simple        0.00  0.00        1.00 0.87     0.58
logic         0.00  0.00        0.04 1.00     0.58
dysfunction   0.81  0.01        0.00 0.00     0.00
love          0.29  0.72        0.00 0.00     0.00
selfhelp      0.12  0.14        0.00 0.00     0.00

 To see confidence intervals of the correlations, print with the short=FALSE option
plot(df4$simple, df4$logic,
     main="Correlation between simple thinking and analytical thinking",
     xlab="Simple thinking",
     ylab="Analytical thinking")

plot(df4$logic,df4$dysfunction)

plot(df4$logic,df4$love)

plot(df4$logic,df4$selfhelp)

plot(df4$dysfunction,df4$love)

plot(df4$selfhelp,df4$love)

plot(df4$dysfunction,df4$selfhelp)