install.packages("tidyverse")
install.packages("car")
install.packages("psych")
library(tidyverse)
library(car)
library(psych)
Lets load the bigfive dataframe from the survey we just completed.You can do this by clicking the file in the LT8 Lab folder or by trying the code below:
bigfive <- read_csv("C:/Users/tc560/Dropbox/Work/LSE/PB130/LT8/Lab/bigfive.csv")
## Parsed with column specification:
## cols(
## .default = col_double(),
## `Event Index` = col_character(),
## `UTC Date` = col_character(),
## `Local Date` = col_character(),
## `Tree Node Key` = col_character(),
## `Repeat Key` = col_logical(),
## `Participant Public ID` = col_character(),
## `Participant Starting Group` = col_logical(),
## `Participant Status` = col_character(),
## `Participant Completion Code` = col_logical(),
## `Participant External Session ID` = col_logical(),
## `Participant Device Type` = col_character(),
## `Participant Device` = col_character(),
## `Participant OS` = col_character(),
## `Participant Browser` = col_character(),
## `Participant Monitor Size` = col_character(),
## `Participant Viewport Size` = col_character(),
## Checkpoint = col_logical(),
## `Task Name` = col_character()
## )
## See spec(...) for full column specifications.
## Warning: 1 parsing failure.
## row col expected actual file
## 15 -- 115 columns 1 columns 'C:/Users/tc560/Dropbox/Work/LSE/PB130/LT8/Lab/bigfive.csv'
bigfive
The first step when working with psychological test data like this is to identify and tehn recode any reverse items. Recall from the lecture I said some tools have reverse items to mitigate the impact of aquiescence? Well this is true of the Big 5 questionnaire. There are several reverse items for each subscale so we need to recode them so that 5 = 1, 2 = 4, and so on.. Lets do that now with the recode function found in the car package:
# first filter out NAs
bigfive <-
bigfive %>%
filter(e1 >= "1")
# note I specify car:: here becuase car and tidyverse have a recode function and I want R to use the car recode function
bigfive$a1 <- car::recode(bigfive$a1r, "1=5; 2=4; 3=3; 4=2; 5=1")
bigfive$c2 <- car::recode(bigfive$c2r, "1=5; 2=4; 3=3; 4=2; 5=1")
bigfive$e2 <- car::recode(bigfive$e2r, "1=5; 2=4; 3=3; 4=2; 5=1")
bigfive$n2 <- car::recode(bigfive$n2r, "1=5; 2=4; 3=3; 4=2; 5=1")
bigfive$a3 <- car::recode(bigfive$a3r, "1=5; 2=4; 3=3; 4=2; 5=1")
bigfive$c4 <- car::recode(bigfive$c4r, "1=5; 2=4; 3=3; 4=2; 5=1")
bigfive$e5 <- car::recode(bigfive$e5r, "1=5; 2=4; 3=3; 4=2; 5=1")
bigfive$c5 <- car::recode(bigfive$c5r, "1=5; 2=4; 3=3; 4=2; 5=1")
bigfive$n5 <- car::recode(bigfive$n5r, "1=5; 2=4; 3=3; 4=2; 5=1")
bigfive$a6 <- car::recode(bigfive$a6r, "1=5; 2=4; 3=3; 4=2; 5=1")
bigfive$e7 <- car::recode(bigfive$e7r, "1=5; 2=4; 3=3; 4=2; 5=1")
bigfive$n7 <- car::recode(bigfive$n7r, "1=5; 2=4; 3=3; 4=2; 5=1")
bigfive$o7 <- car::recode(bigfive$o7r, "1=5; 2=4; 3=3; 4=2; 5=1")
bigfive$a8 <- car::recode(bigfive$a8r, "1=5; 2=4; 3=3; 4=2; 5=1")
bigfive$o9 <- car::recode(bigfive$o9r, "1=5; 2=4; 3=3; 4=2; 5=1")
bigfive$c9 <- car::recode(bigfive$c9r, "1=5; 2=4; 3=3; 4=2; 5=1")
# select out the items
bigfive <- select(bigfive, e1, e2, e3, e4, e5, e6, e7, e8, c1, c2, c3, c4, c5, c6, c7, c8, c9, o1, o2, o3, o4, o5, o6, o7, o8, o9, o10, a1, a2, a3, a4, a5, a6, a7, a8, a9, n1, n2, n3, n4, n5, n6, n7, n8)
The next step after we have recoded the items is to check the scales are reliable. As we saw in the lecture, we can calculate the reilaibility of the Big 5 scales using the Cronbach’s alpha, which is the ratio of variance in the true score to error variance. We saw in the lecture tha tit is calculated as:
r = Vt/V
Where Vt is true score variance, and V is overall variance in the measure.
We can call Cronbach’s alpha using the alpha function in the psych package. Lets do that now for each of our subscales; Openness, Extraversion, Neuroticism, Agreeableness, and Concienciousness.
extraversion <- select(bigfive, e1, e2, e3, e4, e5, e6, e7, e8)
neuroticism <- select(bigfive, n1, n2, n3, n4, n5, n6, n7, n8)
openness <- select(bigfive, o1, o2, o3, o4, o5, o6, o7, o8, o9, o10)
concienciousness <- select(bigfive, c1, c2, c3, c4, c5, c6, c7, c8, c9)
agreeableness <- select(bigfive, a1, a2, a3, a4, a5, a6, a7, a8, a9)
alpha(neuroticism)
##
## Reliability analysis
## Call: alpha(x = neuroticism)
##
## raw_alpha std.alpha G6(smc) average_r S/N ase mean sd median_r
## 0.83 0.83 0.93 0.37 4.8 0.064 2.9 0.71 0.41
##
## lower alpha upper 95% confidence boundaries
## 0.7 0.83 0.95
##
## Reliability if an item is dropped:
## raw_alpha std.alpha G6(smc) average_r S/N alpha se var.r med.r
## n1 0.82 0.82 0.89 0.40 4.6 0.068 0.078 0.48
## n2 0.81 0.81 0.88 0.38 4.4 0.068 0.087 0.48
## n3 0.80 0.80 0.91 0.36 3.9 0.076 0.083 0.39
## n4 0.76 0.76 0.84 0.31 3.2 0.093 0.068 0.34
## n5 0.79 0.79 0.91 0.34 3.7 0.083 0.092 0.34
## n6 0.87 0.87 0.94 0.49 6.7 0.051 0.041 0.54
## n7 0.81 0.80 0.91 0.37 4.1 0.073 0.088 0.43
## n8 0.79 0.79 0.87 0.35 3.7 0.078 0.065 0.36
##
## Item statistics
## n raw.r std.r r.cor r.drop mean sd
## n1 14 0.57 0.59 0.58 0.449 2.1 0.92
## n2 14 0.64 0.63 0.64 0.516 2.8 1.05
## n3 14 0.74 0.74 0.71 0.645 3.1 0.95
## n4 14 0.91 0.92 0.95 0.860 3.4 1.22
## n5 14 0.82 0.80 0.75 0.702 2.9 1.41
## n6 14 0.24 0.23 0.11 0.068 2.6 1.02
## n7 14 0.68 0.69 0.65 0.572 3.1 0.92
## n8 14 0.78 0.79 0.80 0.706 3.1 0.86
##
## Non missing response frequency for each item
## 1 2 3 4 5 miss
## n1 0.29 0.43 0.21 0.07 0.00 0
## n2 0.00 0.57 0.14 0.21 0.07 0
## n3 0.00 0.29 0.36 0.29 0.07 0
## n4 0.00 0.36 0.07 0.36 0.21 0
## n5 0.29 0.00 0.43 0.14 0.14 0
## n6 0.07 0.50 0.29 0.07 0.07 0
## n7 0.00 0.29 0.43 0.21 0.07 0
## n8 0.00 0.21 0.50 0.21 0.07 0
alpha(extraversion)
##
## Reliability analysis
## Call: alpha(x = extraversion)
##
## raw_alpha std.alpha G6(smc) average_r S/N ase mean sd median_r
## 0.82 0.83 0.94 0.39 5.1 0.07 3.2 0.65 0.42
##
## lower alpha upper 95% confidence boundaries
## 0.68 0.82 0.96
##
## Reliability if an item is dropped:
## raw_alpha std.alpha G6(smc) average_r S/N alpha se var.r med.r
## e1 0.80 0.81 0.91 0.38 4.2 0.077 0.089 0.46
## e2 0.79 0.81 0.93 0.38 4.3 0.083 0.089 0.43
## e3 0.77 0.80 0.92 0.36 3.9 0.091 0.083 0.42
## e4 0.79 0.81 0.92 0.38 4.3 0.084 0.073 0.41
## e5 0.77 0.78 0.88 0.34 3.6 0.091 0.089 0.35
## e6 0.81 0.83 0.92 0.40 4.7 0.073 0.086 0.46
## e7 0.79 0.81 0.91 0.38 4.2 0.084 0.091 0.42
## e8 0.86 0.87 0.94 0.49 6.7 0.054 0.033 0.46
##
## Item statistics
## n raw.r std.r r.cor r.drop mean sd
## e1 14 0.68 0.72 0.73 0.62 3.4 0.51
## e2 14 0.72 0.71 0.66 0.61 2.7 0.99
## e3 14 0.81 0.79 0.78 0.71 3.4 1.22
## e4 14 0.74 0.71 0.70 0.65 3.5 0.85
## e5 14 0.85 0.87 0.89 0.77 2.9 1.07
## e6 14 0.65 0.62 0.58 0.48 3.6 1.22
## e7 14 0.74 0.72 0.69 0.65 2.9 0.83
## e8 14 0.25 0.29 0.25 0.07 3.2 0.97
##
## Non missing response frequency for each item
## 1 2 3 4 5 miss
## e1 0.00 0.00 0.57 0.43 0.00 0
## e2 0.14 0.21 0.43 0.21 0.00 0
## e3 0.00 0.36 0.14 0.29 0.21 0
## e4 0.00 0.07 0.50 0.29 0.14 0
## e5 0.00 0.50 0.14 0.29 0.07 0
## e6 0.07 0.07 0.29 0.29 0.29 0
## e7 0.00 0.36 0.36 0.29 0.00 0
## e8 0.00 0.29 0.29 0.36 0.07 0
alpha(openness)
## Warning in alpha(openness): Some items were negatively correlated with the total scale and probably
## should be reversed.
## To do this, run the function again with the 'check.keys=TRUE' option
## Some items ( o6 o9 o10 ) were negatively correlated with the total scale and
## probably should be reversed.
## To do this, run the function again with the 'check.keys=TRUE' option
##
## Reliability analysis
## Call: alpha(x = openness)
##
## raw_alpha std.alpha G6(smc) average_r S/N ase mean sd median_r
## 0.55 0.62 0.9 0.14 1.7 0.18 3.6 0.46 0.12
##
## lower alpha upper 95% confidence boundaries
## 0.2 0.55 0.9
##
## Reliability if an item is dropped:
## raw_alpha std.alpha G6(smc) average_r S/N alpha se var.r med.r
## o1 0.46 0.53 0.88 0.113 1.14 0.21 0.14 0.095
## o2 0.57 0.65 0.89 0.172 1.87 0.17 0.15 0.164
## o3 0.45 0.53 0.88 0.110 1.11 0.22 0.14 0.095
## o4 0.41 0.53 0.85 0.109 1.11 0.23 0.15 0.095
## o5 0.38 0.48 0.83 0.095 0.94 0.25 0.14 0.087
## o6 0.66 0.72 0.91 0.224 2.59 0.14 0.12 0.229
## o7 0.60 0.64 0.87 0.165 1.78 0.16 0.13 0.115
## o8 0.41 0.46 0.83 0.087 0.86 0.23 0.13 0.071
## o9 0.61 0.67 0.89 0.182 2.01 0.16 0.15 0.229
## o10 0.56 0.64 0.90 0.164 1.77 0.16 0.16 0.185
##
## Item statistics
## n raw.r std.r r.cor r.drop mean sd
## o1 14 0.637 0.70 0.68 0.506 3.6 0.85
## o2 14 0.149 0.25 0.21 -0.013 4.4 0.74
## o3 14 0.661 0.72 0.70 0.502 4.0 1.04
## o4 14 0.753 0.72 0.73 0.629 3.6 1.01
## o5 14 0.820 0.84 0.85 0.728 3.2 0.97
## o6 14 -0.027 -0.14 -0.21 -0.261 3.8 1.12
## o7 14 0.202 0.30 0.29 -0.032 3.1 1.07
## o8 14 0.846 0.89 0.92 0.790 4.1 0.73
## o9 14 0.237 0.17 0.14 -0.026 2.9 1.21
## o10 14 0.437 0.31 0.23 0.147 3.4 1.40
##
## Non missing response frequency for each item
## 1 2 3 4 5 miss
## o1 0.00 0.07 0.43 0.36 0.14 0
## o2 0.00 0.00 0.14 0.36 0.50 0
## o3 0.00 0.07 0.29 0.21 0.43 0
## o4 0.00 0.07 0.50 0.14 0.29 0
## o5 0.00 0.29 0.29 0.36 0.07 0
## o6 0.00 0.14 0.29 0.21 0.36 0
## o7 0.07 0.21 0.36 0.29 0.07 0
## o8 0.00 0.00 0.21 0.50 0.29 0
## o9 0.07 0.36 0.29 0.14 0.14 0
## o10 0.14 0.14 0.07 0.43 0.21 0
alpha(concienciousness)
##
## Reliability analysis
## Call: alpha(x = concienciousness)
##
## raw_alpha std.alpha G6(smc) average_r S/N ase mean sd median_r
## 0.83 0.85 0.97 0.38 5.5 0.065 3.8 0.6 0.45
##
## lower alpha upper 95% confidence boundaries
## 0.7 0.83 0.96
##
## Reliability if an item is dropped:
## raw_alpha std.alpha G6(smc) average_r S/N alpha se var.r med.r
## c1 0.79 0.81 0.92 0.35 4.3 0.078 0.080 0.45
## c2 0.87 0.88 0.97 0.48 7.5 0.049 0.036 0.52
## c3 0.80 0.82 0.90 0.36 4.6 0.074 0.077 0.41
## c4 0.82 0.84 0.95 0.39 5.1 0.070 0.071 0.46
## c5 0.78 0.81 0.91 0.35 4.2 0.089 0.077 0.41
## c6 0.81 0.83 0.96 0.37 4.8 0.073 0.074 0.41
## c7 0.80 0.82 0.95 0.36 4.6 0.078 0.070 0.36
## c8 0.83 0.85 0.94 0.41 5.5 0.068 0.080 0.51
## c9 0.78 0.81 0.95 0.35 4.3 0.084 0.071 0.37
##
## Item statistics
## n raw.r std.r r.cor r.drop mean sd
## c1 14 0.80 0.82 0.82 0.742 4.3 0.73
## c2 14 0.22 0.19 0.12 0.027 3.3 1.07
## c3 14 0.72 0.75 0.76 0.655 4.5 0.65
## c4 14 0.68 0.63 0.63 0.523 3.4 1.22
## c5 14 0.86 0.83 0.84 0.775 3.6 1.22
## c6 14 0.67 0.71 0.69 0.593 4.2 0.70
## c7 14 0.75 0.75 0.75 0.658 3.7 0.91
## c8 14 0.48 0.55 0.54 0.383 4.1 0.62
## c9 14 0.82 0.81 0.79 0.743 3.2 0.97
##
## Non missing response frequency for each item
## 1 2 3 4 5 miss
## c1 0.00 0.00 0.14 0.43 0.43 0
## c2 0.00 0.29 0.29 0.29 0.14 0
## c3 0.00 0.00 0.07 0.36 0.57 0
## c4 0.07 0.14 0.29 0.29 0.21 0
## c5 0.00 0.29 0.14 0.29 0.29 0
## c6 0.00 0.00 0.14 0.50 0.36 0
## c7 0.00 0.07 0.36 0.36 0.21 0
## c8 0.00 0.00 0.14 0.64 0.21 0
## c9 0.00 0.21 0.50 0.14 0.14 0
alpha(agreeableness)
##
## Reliability analysis
## Call: alpha(x = agreeableness)
##
## raw_alpha std.alpha G6(smc) average_r S/N ase mean sd median_r
## 0.85 0.84 0.94 0.38 5.4 0.057 4 0.61 0.44
##
## lower alpha upper 95% confidence boundaries
## 0.74 0.85 0.96
##
## Reliability if an item is dropped:
## raw_alpha std.alpha G6(smc) average_r S/N alpha se var.r med.r
## a1 0.81 0.81 0.90 0.34 4.2 0.071 0.078 0.35
## a2 0.87 0.86 0.93 0.44 6.3 0.049 0.064 0.49
## a3 0.82 0.82 0.92 0.36 4.4 0.068 0.072 0.43
## a4 0.80 0.80 0.91 0.33 4.0 0.079 0.060 0.41
## a5 0.82 0.81 0.91 0.35 4.4 0.071 0.070 0.41
## a6 0.84 0.84 0.94 0.39 5.1 0.062 0.071 0.45
## a7 0.84 0.83 0.94 0.38 4.9 0.062 0.084 0.44
## a8 0.83 0.82 0.92 0.36 4.6 0.067 0.078 0.43
## a9 0.86 0.86 0.92 0.43 6.1 0.050 0.063 0.49
##
## Item statistics
## n raw.r std.r r.cor r.drop mean sd
## a1 14 0.83 0.82 0.82 0.76 3.6 0.93
## a2 14 0.34 0.38 0.34 0.20 4.2 0.80
## a3 14 0.76 0.76 0.75 0.68 4.1 0.83
## a4 14 0.89 0.87 0.89 0.84 3.9 1.10
## a5 14 0.80 0.78 0.77 0.72 4.1 1.03
## a6 14 0.65 0.61 0.56 0.52 3.6 1.01
## a7 14 0.63 0.66 0.61 0.55 4.6 0.65
## a8 14 0.74 0.72 0.71 0.66 4.1 0.86
## a9 14 0.36 0.41 0.39 0.23 4.0 0.78
##
## Non missing response frequency for each item
## 2 3 4 5 miss
## a1 0.07 0.43 0.29 0.21 0
## a2 0.07 0.00 0.57 0.36 0
## a3 0.07 0.07 0.57 0.29 0
## a4 0.14 0.21 0.29 0.36 0
## a5 0.14 0.00 0.43 0.43 0
## a6 0.21 0.07 0.57 0.14 0
## a7 0.00 0.07 0.29 0.64 0
## a8 0.07 0.07 0.50 0.36 0
## a9 0.07 0.07 0.64 0.21 0
Are the scales reliable? Use Nunally’s criteria of 0.70 to answer.
Once we have established that the Big 5 scales are reliable, we can move to aggregating the items to creating the variables for each sub-scale. We can easily do this with the mutate function, as we saw at the beginning of the first semester.
bigfive <- bigfive %>%
mutate(neuroticism = (n1 + n2 + n3 + n4 + n5 + n6 + n7 + n8)/8)
bigfive <- bigfive %>%
mutate(openness = (o1 + o2 + o3 + o4 + o5 + o6 + o7 + o8 + o9 + o10)/10)
bigfive <- bigfive %>%
mutate(extraversion = (e1 + e2 + e3 + e4 + e5 + e6 + e7 + e8)/8)
bigfive <- bigfive %>%
mutate(concienciousness = (c1 + c2 + c3 + c4 + c5 + c6 + c7 + c8 + c9)/9)
bigfive <- bigfive %>%
mutate(agreeableness = (a1 + a2 + a3 + a4 + a5 + a6 + a7 + a8 + a9)/9)
# First create dataframe with means
mean_score <- c(mean(bigfive$neuroticism), mean(bigfive$openness), mean(bigfive$extraversion), mean(bigfive$concienciousness), mean(bigfive$agreeableness))
personality <- c("neuroticism", "openness", "extraversion", "concienciousness", "agreeableness")
data <- data_frame(mean_score, personality)
## Warning: `data_frame()` is deprecated, use `tibble()`.
## This warning is displayed once per session.
## Now create the bar chart