This is where we load the libraries that we use in our analysis.
library(psych)library(dplyr)
Attaching package: 'dplyr'
The following objects are masked from 'package:stats':
filter, lag
The following objects are masked from 'package:base':
intersect, setdiff, setequal, union
library(kableExtra)
Attaching package: 'kableExtra'
The following object is masked from 'package:dplyr':
group_rows
library(nFactors)
Loading required package: lattice
Attaching package: 'nFactors'
The following object is masked from 'package:lattice':
parallel
Load Data
This is where we load the data and do a bit of cleaning. We remove the extra rows from Qualtrics, indicate which sample is which, and remove participants who didn’t reach the end of the survey. ANd then we combine both samples into a single file.
We needed to turn the free response analytical thinking responses into yes/no coded variables. started in R but switched to Excel because it was easier.
We had to recode our variables as numeric so we could calculate descriptive statistics. we used describe command to view descriotives and check skew and kurtosis.
# Convert all columns in df2 to numericdf4 <- df3df4[] <-lapply(df3, function(x) as.numeric(x))
Warning in FUN(X[[i]], ...): NAs introduced by coercion
Warning in FUN(X[[i]], ...): NAs introduced by coercion
Warning in FUN(X[[i]], ...): NAs introduced by coercion
Warning in FUN(X[[i]], ...): NAs introduced by coercion
Warning in FUN(X[[i]], ...): NAs introduced by coercion
Warning in FUN(X[[i]], ...): NAs introduced by coercion
Warning in FUN(X[[i]], ...): NAs introduced by coercion
Warning in FUN(X[[i]], ...): NAs introduced by coercion
df4 <-subset(df4, select=c(grep("Q27", colnames(df4)),simple,logic))# Get descriptive statistics for df2desc_stats <-describe(df4)# Create a new column to identify rows with skew or kurtosis greater than 2desc_stats <- desc_stats %>%mutate(highlight =ifelse(abs(skew) >2|abs(kurtosis) >2, "highlight", "no"))# Create a table with highlighted rows for high skewness or kurtosisdesc_stats %>%kable(format ="html", digits =2) %>%kable_styling(full_width =FALSE, bootstrap_options =c("striped", "hover", "condensed")) %>%row_spec(which(desc_stats$highlight =="highlight"), background ="red")
vars
n
mean
sd
median
trimmed
mad
min
max
range
skew
kurtosis
se
highlight
Q27.1_1
1
416
2.41
1.06
2
2.33
1.48
1
5
4
0.77
-0.04
0.05
no
Q27.1_2
2
413
2.44
1.22
2
2.33
1.48
1
5
4
0.61
-0.62
0.06
no
Q27.1_3
3
414
2.66
1.17
2
2.60
1.48
1
5
4
0.40
-0.72
0.06
no
Q27.1_4
4
413
2.86
1.26
3
2.82
1.48
1
5
4
0.28
-1.02
0.06
no
Q27.1_5
5
413
2.43
1.22
2
2.33
1.48
1
5
4
0.50
-0.77
0.06
no
Q27.1_6
6
415
2.38
1.23
2
2.27
1.48
1
5
4
0.60
-0.69
0.06
no
Q27.1_7
7
411
3.16
1.19
3
3.17
1.48
1
5
4
-0.04
-0.97
0.06
no
Q27.1_8
8
413
2.75
1.26
3
2.69
1.48
1
5
4
0.21
-1.03
0.06
no
Q27.1_9
9
415
2.22
1.16
2
2.09
1.48
1
5
4
0.84
-0.18
0.06
no
Q27.1_10
10
415
2.07
1.07
2
1.92
1.48
1
5
4
0.98
0.29
0.05
no
Q27.1_11
11
416
2.33
1.16
2
2.22
1.48
1
5
4
0.72
-0.31
0.06
no
Q27.1_12
12
416
2.48
1.21
2
2.39
1.48
1
5
4
0.49
-0.71
0.06
no
Q27.1_13
13
415
2.11
1.13
2
1.96
1.48
1
5
4
0.90
0.04
0.06
no
Q27.1_14
14
413
2.50
1.26
2
2.39
1.48
1
5
4
0.49
-0.83
0.06
no
Q27.1_16
15
414
2.80
1.15
3
2.77
1.48
1
5
4
0.21
-0.84
0.06
no
Q27.1_17
16
415
2.27
1.16
2
2.13
1.48
1
5
4
0.81
-0.17
0.06
no
Q27.1_18
17
416
2.65
1.19
2
2.59
1.48
1
5
4
0.36
-0.87
0.06
no
Q27.1_19
18
415
2.32
1.22
2
2.19
1.48
1
5
4
0.73
-0.47
0.06
no
Q27.1_20
19
416
2.24
1.14
2
2.10
1.48
1
5
4
0.84
-0.04
0.06
no
Q27.1_21
20
413
2.34
1.12
2
2.25
1.48
1
5
4
0.65
-0.39
0.06
no
Q27.1_22
21
414
2.65
1.16
2
2.59
1.48
1
5
4
0.45
-0.67
0.06
no
Q27.1_23
22
416
2.45
1.19
2
2.36
1.48
1
5
4
0.51
-0.68
0.06
no
Q27.1_24
23
416
2.88
1.21
3
2.84
1.48
1
5
4
0.20
-0.93
0.06
no
Q27.1_25
24
415
2.27
1.18
2
2.15
1.48
1
5
4
0.69
-0.48
0.06
no
simple
25
420
3.69
0.62
4
3.82
0.00
0
4
4
-2.66
9.41
0.03
highlight
logic
26
420
2.46
1.03
3
2.51
1.48
0
4
4
-0.58
-0.27
0.05
no
Histograms
hist(df3$simple)
hist(df3$logic)
Factor Analysis
used this to identify the latent variables latent variables - show which variables are moving tog because of an underlying cause
d <-na.omit(subset(df4, select=c(grep("Q27",colnames(df4)))))names(d)
labs <-c("Depression","Love","Toxic People","Anxiety","Dating","Attraction","Health & Wellbeing","Self","Narcissism","Interpersonal Abuse","Intelligence","Trauma","Psychiatric Symptoms","Friendship","Helpful Advice","Introversion","Good & Bad Habits","Family","Language & Communication","Manipulativeness","Facts & Information","Childhood","Changing & Improving Your Life","Fakeness")colnames(d) <- labsev <-eigen(cor(d)) # get eigenvaluesap <-parallel(subject=nrow(d),var=ncol(d),rep=100,cent=.05) # run the parallel analysis, gives us another perspective on how many factors should be used in the modelnS <-nScree(x=ev$values, aparallel=ap$eigen$qevpea) # creates the scree plotplotnScree(nS) # shows us the scree plot, look for the elbows