1 Prep

Load necessary packages.

library(readr)
library(readxl)
library(dplyr)
library(psych)
library(factoextra)
library(GPArotation)

Set Working Directory.

setwd("/Users/sanshirohogawa/Library/Mobile Documents/com~apple~CloudDocs/Stats Consulting/Abbie")

Load data.

dat2 <- read_excel("IDs_Only.xlsx")
dat2 <- dat2[1:160, 1:15]
dat2$Participant <- as.factor(dat2$Participant) # Make participant IDs a factor
is.factor(dat2$Participant) # Confirm it's a factor
## [1] TRUE

1.1 Check for issues with data

colSums(is.na(dat2))
##                Participant     MasteryGoal_motivation 
##                          0                          0 
##   AvoidanceGoal_motivation PerformanceGoal_motivation 
##                          0                          0 
##    SelfEfficacy_motivation       TaskValue_motivation 
##                          0                          0 
##     Attribution_motivation            Somatic_Anxiety 
##                          0                          0 
##          Avoidance_Anxiety          Cognitive_Anxiety 
##                          0                          0 
##           OSPANabsolute_WM              OSPANtotal_WM 
##                          0                          0 
##               OSPANmath_WM            RSPANpartial_WM 
##                          0                          0 
##              RSPANtotal_WM 
##                          0
rowSums(is.na(dat2))
##   [1] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
##  [38] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
##  [75] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## [112] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## [149] 0 0 0 0 0 0 0 0 0 0 0 0

No missing values

2 PCA for Motivation

Extract relevant columns, and make sure there is only 1 row per 1 participant.

# Keep only the variables you need
mot <- dat2 %>%
  select(Participant, 
         MasteryGoal_motivation,
         AvoidanceGoal_motivation,
         PerformanceGoal_motivation,
         SelfEfficacy_motivation,
         TaskValue_motivation,
         Attribution_motivation)

# Make sure the variables are stored as numbers.
mot$MasteryGoal_motivation <- as.numeric(mot$MasteryGoal_motivation)
is.numeric(mot$MasteryGoal_motivation)
## [1] TRUE
mot$AvoidanceGoal_motivation <- as.numeric(mot$AvoidanceGoal_motivation)
is.numeric(mot$AvoidanceGoal_motivation)
## [1] TRUE
mot$PerformanceGoal_motivation <- as.numeric(mot$PerformanceGoal_motivation)
is.numeric(mot$PerformanceGoal_motivation)
## [1] TRUE
mot$SelfEfficacy_motivation <- as.numeric(mot$SelfEfficacy_motivation)
is.numeric(mot$SelfEfficacy_motivation)
## [1] TRUE
mot$TaskValue_motivation <- as.numeric(mot$TaskValue_motivation)
is.numeric(mot$TaskValue_motivation)
## [1] TRUE
mot$Attribution_motivation <- as.numeric(mot$Attribution_motivation)
is.numeric(mot$Attribution_motivation)
## [1] TRUE
# Confirm there is only one row for each participant
table(mot$Participant)
## 
##   1  10 100 101 102 103 104 105 106 107 108 109  11 110 111 112 113 114 115 116 
##   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1 
## 117 118 119  12 120 121 122 123 124 125 126 127 128 129  13 130 131 132 133 134 
##   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1 
## 135 136 137 138 139  14 140 141 142 143 144 145 146 147 148 149  15 150 151 152 
##   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1 
## 153 154 155 156 157 158 159  16 160  17  18  19   2  20  21  22  23  24  25  26 
##   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1 
##  27  28  29   3  30  31  32  33  34  35  36  37  38  39   4  40  41  42  43  44 
##   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1 
##  45  46  47  48  49   5  50  51  52  53  54  55  56  57  58  59   6  60  61  62 
##   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1 
##  63  64  65  66  67  68  69   7  70  71  72  73  74  75  76  77  78  79   8  80 
##   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1 
##  81  82  83  84  85  86  87  88  89   9  90  91  92  93  94  95  96  97  98  99 
##   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1

Make sure there is no missing values.

colSums(is.na(mot))
##                Participant     MasteryGoal_motivation 
##                          0                          0 
##   AvoidanceGoal_motivation PerformanceGoal_motivation 
##                          0                          0 
##    SelfEfficacy_motivation       TaskValue_motivation 
##                          0                          0 
##     Attribution_motivation 
##                          0

No missing values.

Descriptives.

describe(mot)
##                            vars   n  mean    sd median trimmed   mad min max
## Participant*                  1 160 80.50 46.33   80.5   80.50 59.30   1 160
## MasteryGoal_motivation        2 160 22.68  5.80   22.0   22.65  5.93   7  39
## AvoidanceGoal_motivation      3 160 40.29  8.39   41.0   40.34  8.90  17  59
## PerformanceGoal_motivation    4 160 23.83  6.68   24.0   23.84  5.93   3  40
## SelfEfficacy_motivation       5 160 39.71  6.41   40.0   39.48  5.93  24  58
## TaskValue_motivation          6 160 27.86  7.96   27.0   27.63  7.41   7  50
## Attribution_motivation        7 160 19.36  3.34   20.0   19.49  2.97  10  30
##                            range  skew kurtosis   se
## Participant*                 159  0.00    -1.22 3.66
## MasteryGoal_motivation        32  0.11     0.32 0.46
## AvoidanceGoal_motivation      42 -0.12    -0.46 0.66
## PerformanceGoal_motivation    37 -0.08     0.26 0.53
## SelfEfficacy_motivation       34  0.34     0.34 0.51
## TaskValue_motivation          43  0.22     0.13 0.63
## Attribution_motivation        20 -0.24     0.75 0.26

2.1 PCA Version 1

2.1.1 Get the correlation matrix

cor.mat.mot <- cor(mot[, 2:7])

2.1.2 Get the Eigenvalues.

eig.vals.mot <- eigen(cor.mat.mot)$values

2.1.3 Plot Eigenvalues.

plot(eig.vals.mot,
     type = "b",
     main = "Scree Plot",
     xlab = "Index",
     ylab = "Eigenvalue")
abline(h = 1, lty = 2)  # Kaiser reference

The scree plot supports two components (i.e., two circles above the cut off value)

2.2 PCA Version 2

2.2.1 Run PCA (# of components not pre-determined)

pca.motivation <- prcomp(mot[, 2:7], scale. = TRUE) # scale. = TRUE makes sure the values are standardized
summary(pca.motivation) # Look at the Cumulative Proportion
## Importance of components:
##                           PC1    PC2    PC3    PC4     PC5     PC6
## Standard deviation     1.6583 1.0405 0.9392 0.7749 0.66250 0.49621
## Proportion of Variance 0.4583 0.1804 0.1470 0.1001 0.07315 0.04104
## Cumulative Proportion  0.4583 0.6387 0.7857 0.8858 0.95896 1.00000

PC1 + PC2 explain 63.87% of variance. Let’s go with two components.

2.2.2 Scree plot

fviz_eig(pca.motivation, addlabels = TRUE)

The scree plot supports two components.

2.3 Run PCA with 2 Components

pca.mot <- principal(mot[, 2:7], # dataset
                     # number of components
                     nfactors = 2, 
                     # rotation method is oblique (let the components correlate)
                     rotate = "oblimin",
                     # add component scores
                     scores = TRUE)

2.3.1 PCA Results for Motivation

Loadings (hide loadings below |.40|)

print(pca.mot$loadings, cutoff = .40, sort = TRUE)
## 
## Loadings:
##                            TC1    TC2   
## MasteryGoal_motivation      0.756       
## AvoidanceGoal_motivation   -0.846       
## TaskValue_motivation        0.814       
## PerformanceGoal_motivation         0.528
## SelfEfficacy_motivation            0.722
## Attribution_motivation             0.691
## 
##                  TC1   TC2
## SS loadings    2.152 1.522
## Proportion Var 0.359 0.254
## Cumulative Var 0.359 0.612
  • RC1 = Mastery Goal, (negative loading) Avoidance Goal (loading negative), Task Value (MAT)
  • RC2 = Performance Goal, Self Efficacy, Attribution (PSA)

Variance Explained

pca.mot$Vaccounted # Cumulative Variance Explained = 63.87%
##                             TC1       TC2
## SS loadings           2.2311294 1.6012376
## Proportion Var        0.3718549 0.2668729
## Cumulative Var        0.3718549 0.6387278
## Proportion Explained  0.5821805 0.4178195
## Cumulative Proportion 0.5821805 1.0000000

Extract the component scores and save as a dataframe

pca.mot.scores.df <- as.data.frame(pca.mot$scores)

Rename the components. RC1 = TMA, RC2 = SAP for now, but come up with names that best represent each component.

colnames(pca.mot.scores.df) <- c("TMA", "SAP")

Add participant IDs to the dataframe.

pca.mot.scores.df$Participant <- mot$Participant

Combine the original dataset with the component scores based on the Participant IDs.

dat2.mot.scores <- left_join(dat2, pca.mot.scores.df, by = "Participant")

With the new dataset (dat2_with_scores), you can use TMA and SAP as new variables.

3 PCA for Anxiety

Extract relevant columns, and make sure there is only 1 row per 1 participant.

# Keep only the variables you need
anx <- dat2 %>%
  select(Participant, 
         Somatic_Anxiety, 
         Avoidance_Anxiety, 
         Cognitive_Anxiety)

# Make sure the variables are stored as numbers.
anx$Somatic_Anxiety <- as.numeric(anx$Somatic_Anxiety)
is.numeric(anx$Somatic_Anxiety)
## [1] TRUE
anx$Avoidance_Anxiety <- as.numeric(anx$Avoidance_Anxiety)
is.numeric(anx$Avoidance_Anxiety)
## [1] TRUE
anx$Cognitive_Anxiety <- as.numeric(anx$Cognitive_Anxiety)
is.numeric(anx$Cognitive_Anxiety)
## [1] TRUE
# Confirm there is only one row for each participant
table(anx$Participant)
## 
##   1  10 100 101 102 103 104 105 106 107 108 109  11 110 111 112 113 114 115 116 
##   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1 
## 117 118 119  12 120 121 122 123 124 125 126 127 128 129  13 130 131 132 133 134 
##   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1 
## 135 136 137 138 139  14 140 141 142 143 144 145 146 147 148 149  15 150 151 152 
##   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1 
## 153 154 155 156 157 158 159  16 160  17  18  19   2  20  21  22  23  24  25  26 
##   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1 
##  27  28  29   3  30  31  32  33  34  35  36  37  38  39   4  40  41  42  43  44 
##   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1 
##  45  46  47  48  49   5  50  51  52  53  54  55  56  57  58  59   6  60  61  62 
##   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1 
##  63  64  65  66  67  68  69   7  70  71  72  73  74  75  76  77  78  79   8  80 
##   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1 
##  81  82  83  84  85  86  87  88  89   9  90  91  92  93  94  95  96  97  98  99 
##   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1

Make sure there is no missing values.

colSums(is.na(anx))
##       Participant   Somatic_Anxiety Avoidance_Anxiety Cognitive_Anxiety 
##                 0                 0                 0                 0

No missing values.

Descriptives.

describe(anx)
##                   vars   n  mean    sd median trimmed   mad min max range  skew
## Participant*         1 160 80.50 46.33   80.5   80.50 59.30   1 160   159  0.00
## Somatic_Anxiety      2 160 22.19  6.62   21.5   22.08  8.15  10  37    27  0.16
## Avoidance_Anxiety    3 160 17.20  3.98   17.0   17.13  4.45   7  29    22  0.19
## Cognitive_Anxiety    4 160 33.92  5.09   35.0   34.12  5.93  19  44    25 -0.36
##                   kurtosis   se
## Participant*         -1.22 3.66
## Somatic_Anxiety      -0.82 0.52
## Avoidance_Anxiety     0.18 0.31
## Cognitive_Anxiety    -0.61 0.40

3.1 PCA Version 1

3.1.1 Get the correlation matrix

cor.mat.anx <- cor(anx[, 2:4])

3.1.2 Get the Eigenvalues.

eig.vals.anx <- eigen(cor.mat.anx)$values

3.1.3 Plot Eigenvalues.

plot(eig.vals.anx,
     type = "b",
     main = "Scree Plot",
     xlab = "Index",
     ylab = "Eigenvalue")
abline(h = 1, lty = 2)  # Kaiser reference

The scree plot supports one component … (i.e., one circle above the cut off value)

3.2 PCA Version 2

3.2.1 Run PCA (# of components not pre-determined)

pca.anxiety <- prcomp(anx[, 2:4], scale. = TRUE) # scale. = TRUE makes sure the values are standardized
summary(pca.anxiety) # Look at the Cumulative Proportion
## Importance of components:
##                           PC1    PC2    PC3
## Standard deviation     1.2975 0.9132 0.6946
## Proportion of Variance 0.5612 0.2780 0.1608
## Cumulative Proportion  0.5612 0.8392 1.0000

PC1 + PC2 explain 83.92% of variance. Let’s go with two components because PC1 only explains 56.12% of variance.

3.2.2 Scree plot

fviz_eig(pca.anxiety, addlabels = TRUE)

The scree plot supports two components.

3.3 Run PCA with 2 Components

pca.anx <- principal(anx[, 2:4], # dataset
                     # number of components
                     nfactors = 2, 
                     # rotation method is oblique (let the components correlate)
                     rotate = "oblimin",
                     # add component scores
                     scores = TRUE)

3.3.1 PCA Results for Anxiety

Loadings (hide loadings below |.40|)

print(pca.anx$loadings, cutoff = .40, sort = TRUE)
## 
## Loadings:
##                   TC1    TC2   
## Somatic_Anxiety    0.932       
## Avoidance_Anxiety  0.741       
## Cognitive_Anxiety         0.979
## 
##                  TC1   TC2
## SS loadings    1.418 1.055
## Proportion Var 0.473 0.352
## Cumulative Var 0.473 0.825
  • RC1 = Somatic + Avoidance (SA)
  • RC2 = Cognitive (C)

Variance Explained

pca.anx$Vaccounted # Cumulative Variance Explained = 83.91%
##                             TC1       TC2
## SS loadings           1.4403597 1.0772092
## Proportion Var        0.4801199 0.3590697
## Cumulative Var        0.4801199 0.8391896
## Proportion Explained  0.5721232 0.4278768
## Cumulative Proportion 0.5721232 1.0000000

Extract the component scores and save as a dataframe

pca.anx.scores.df <- as.data.frame(pca.anx$scores)

Rename the components. RC1 = SA, RC2 = C for now, but come up with names that best represent each component.

colnames(pca.anx.scores.df) <- c("SA", "C")

Add participant IDs to the dataframe.

pca.anx.scores.df$Participant <- anx$Participant

Combine the original dataset with the component scores based on the Participant IDs.

dat2.anx.scores <- left_join(dat2, pca.anx.scores.df, by = "Participant")

With the new dataset (dat2.anx.scores), you can use SA and C as new variables.

4 PCA for Perceived Task Difficulty

dat3 <- read_csv("Cleaned_RQ2b_Dataset.csv", show_col_types = FALSE)
dat3$Participant <- as.factor(dat3$Participant) # Make participant IDs a factor
is.factor(dat3$Participant) # Confirm it's a factor
## [1] TRUE

4.1 Check for issues with data

colSums(is.na(dat3))
##          Participant                 Task                Genre 
##                    0                    0                    0 
##           Complexity         mentaleffort           difficulty 
##                    0                    1                    1 
##      contentplanning linguisticchallenges          MasteryGoal 
##                    1                    1                    0 
##        AvoidancegOAL      PerformanceGoal         SelfEfficacy 
##                    0                    0                    0 
##            TaskValue          Attribution         SomaticTotal 
##                    0                    0                    0 
##       AvoidanceTotal       CognitiveTotal        OSPANabsolute 
##                    0                    0                    0 
##           OSPANtotal            OSPANmath         RSPANPARTIAL 
##                    0                    0                    0 
##           RSPANTOTAL 
##                    0
rowSums(is.na(dat3))
##   [1] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
##  [38] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
##  [75] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## [112] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## [149] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## [186] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## [223] 4 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## [260] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## [297] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## [334] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## [371] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## [408] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## [445] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## [482] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## [519] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## [556] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## [593] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## [630] 0 0 0 0 0 0 0 0 0 0 0

Missing values present.

which(rowSums(is.na(dat3)) > 0)
## [1] 223

Row 223 has missing values.

View(dat3)

Participant 56. Therefore, I will remove Participant 56.

dat3 <- dat3 %>% 
  filter(Participant != "56")

# Drop the levels that are not being used
dat3$Participant <- droplevels(dat3$Participant)

# Confirm the number of unique participant IDs
unique(dat3$Participant)
##   [1] 1   2   3   4   5   6   7   8   9   10  11  12  13  14  15  16  17  18 
##  [19] 19  20  21  22  23  24  25  26  27  28  29  30  31  32  33  34  35  36 
##  [37] 37  38  39  40  41  42  43  44  45  46  47  48  49  50  51  52  53  54 
##  [55] 55  57  58  59  60  61  62  63  64  65  66  67  68  69  70  71  72  73 
##  [73] 74  75  76  77  78  79  80  81  82  83  84  85  86  87  88  89  90  91 
##  [91] 92  93  94  95  96  97  98  99  100 101 102 103 104 105 106 107 108 109
## [109] 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127
## [127] 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145
## [145] 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160
## 159 Levels: 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 ... 160

Extract relevant columns, and make sure there is only 1 row per 1 participant.

# Keep only the variables you need
diff <- dat3 %>%
  select(Participant, 
         mentaleffort, 
         difficulty,
         contentplanning,
         linguisticchallenges)

# Keep only one unique row per participant
diff <- distinct(diff, Participant, .keep_all = TRUE)

# Confirm there is only one row for each participant
table(diff$Participant)
## 
##   1   2   3   4   5   6   7   8   9  10  11  12  13  14  15  16  17  18  19  20 
##   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1 
##  21  22  23  24  25  26  27  28  29  30  31  32  33  34  35  36  37  38  39  40 
##   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1 
##  41  42  43  44  45  46  47  48  49  50  51  52  53  54  55  57  58  59  60  61 
##   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1 
##  62  63  64  65  66  67  68  69  70  71  72  73  74  75  76  77  78  79  80  81 
##   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1 
##  82  83  84  85  86  87  88  89  90  91  92  93  94  95  96  97  98  99 100 101 
##   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1 
## 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 
##   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1 
## 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 
##   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1 
## 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 
##   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1

Make sure there is no missing values.

colSums(is.na(dat3))
##          Participant                 Task                Genre 
##                    0                    0                    0 
##           Complexity         mentaleffort           difficulty 
##                    0                    0                    0 
##      contentplanning linguisticchallenges          MasteryGoal 
##                    0                    0                    0 
##        AvoidancegOAL      PerformanceGoal         SelfEfficacy 
##                    0                    0                    0 
##            TaskValue          Attribution         SomaticTotal 
##                    0                    0                    0 
##       AvoidanceTotal       CognitiveTotal        OSPANabsolute 
##                    0                    0                    0 
##           OSPANtotal            OSPANmath         RSPANPARTIAL 
##                    0                    0                    0 
##           RSPANTOTAL 
##                    0

No missing values.

Descriptives.

describe(dat3)
##                      vars   n  mean    sd median trimmed   mad min max range
## Participant*            1 636 80.00 45.93   80.0   80.00 59.30   1 159   158
## Task*                   2 636  2.50  1.12    2.5    2.50  1.48   1   4     3
## Genre*                  3 636  1.50  0.50    1.5    1.50  0.74   1   2     1
## Complexity*             4 636  1.50  0.50    1.5    1.50  0.74   1   2     1
## mentaleffort            5 636  6.07  1.63    6.0    6.13  1.48   1   9     8
## difficulty              6 636  5.93  1.73    6.0    6.00  1.48   1   9     8
## contentplanning         7 636  5.32  1.95    5.0    5.33  1.48   1   9     8
## linguisticchallenges    8 636  6.48  1.71    7.0    6.56  1.48   1   9     8
## MasteryGoal             9 636 22.70  5.80   22.0   22.68  5.93   7  39    32
## AvoidancegOAL          10 636 40.31  8.39   41.0   40.37  8.90  17  59    42
## PerformanceGoal        11 636 23.81  6.68   24.0   23.81  5.93   3  40    37
## SelfEfficacy           12 636 39.70  6.41   40.0   39.48  5.93  24  58    34
## TaskValue              13 636 27.87  7.96   27.0   27.64  7.41   7  50    43
## Attribution            14 636 19.35  3.34   20.0   19.48  2.97  10  30    20
## SomaticTotal           15 636 22.19  6.63   22.0   22.08  7.41  10  37    27
## AvoidanceTotal         16 636 17.19  3.98   17.0   17.12  4.45   7  29    22
## CognitiveTotal         17 636 33.93  5.10   35.0   34.13  5.93  19  44    25
## OSPANabsolute          18 636 38.31 15.54   39.0   38.79 13.34   0  75    75
## OSPANtotal             19 636 52.31 14.99   55.0   54.15 11.86   1  78    77
## OSPANmath*             20 636 15.42 10.46   16.0   15.45 14.83   1  29    28
## RSPANPARTIAL           21 636 15.94  6.27   16.0   15.78  5.93   1  36    35
## RSPANTOTAL             22 636 29.04  8.90   29.0   28.87  7.41   7  54    47
##                       skew kurtosis   se
## Participant*          0.00    -1.21 1.82
## Task*                 0.00    -1.37 0.04
## Genre*                0.00    -2.00 0.02
## Complexity*           0.00    -2.00 0.02
## mentaleffort         -0.37    -0.10 0.06
## difficulty           -0.32    -0.37 0.07
## contentplanning      -0.05    -0.75 0.08
## linguisticchallenges -0.44    -0.32 0.07
## MasteryGoal           0.11     0.35 0.23
## AvoidancegOAL        -0.13    -0.45 0.33
## PerformanceGoal      -0.07     0.29 0.26
## SelfEfficacy          0.34     0.35 0.25
## TaskValue             0.22     0.14 0.32
## Attribution          -0.23     0.77 0.13
## SomaticTotal          0.16    -0.81 0.26
## AvoidanceTotal        0.20     0.20 0.16
## CognitiveTotal       -0.37    -0.60 0.20
## OSPANabsolute        -0.28     0.06 0.62
## OSPANtotal           -1.13     1.20 0.59
## OSPANmath*           -0.15    -1.65 0.41
## RSPANPARTIAL          0.35     0.34 0.25
## RSPANTOTAL            0.14     0.32 0.35

4.2 PCA Version 1

4.2.1 Get the correlation matrix

cor.mat.diff <- cor(diff[, 2:5])

4.2.2 Get the Eigenvalues.

eig.vals.diff <- eigen(cor.mat.diff)$values

4.2.3 Plot Eigenvalues.

plot(eig.vals.diff,
     type = "b",
     main = "Scree Plot",
     xlab = "Index",
     ylab = "Eigenvalue")
abline(h = 1, lty = 2)  # Kaiser reference

The scree plot supports one component … (i.e., one circle above the cut off value)

4.3 PCA Version 2

4.3.1 Run PCA (# of components not pre-determined)

pca.difficulty <- prcomp(mot[, 2:5], scale. = TRUE) # scale. = TRUE makes sure the values are standardized
summary(pca.difficulty) # Look at the Cumulative Proportion
## Importance of components:
##                           PC1    PC2    PC3    PC4
## Standard deviation     1.4091 0.9963 0.7730 0.6512
## Proportion of Variance 0.4964 0.2482 0.1494 0.1060
## Cumulative Proportion  0.4964 0.7446 0.8940 1.0000

PC1 + PC2 explain 77.57% of variance. Let’s go with two components because PC1 only explains 53.75% of variance.

4.3.2 Scree plot

fviz_eig(pca.difficulty, addlabels = TRUE)

The scree plot supports two components.

4.4 Run PCA with 2 Components

pca.diff <- principal(diff[, 2:5], # dataset
                     # number of components
                     nfactors = 2, 
                     # rotation method is oblique (let the components correlate)
                     rotate = "oblimin",
                     # add component scores
                     scores = TRUE)

4.4.1 PCA Results for Perceived Task Difficulty

Loadings (hide loadings below |.40|)

print(pca.diff$loadings, cutoff = .40, sort = TRUE)
## 
## Loadings:
##                      TC1    TC2   
## mentaleffort          0.843       
## difficulty            0.919       
## linguisticchallenges  0.880       
## contentplanning              0.997
## 
##                  TC1   TC2
## SS loadings    2.331 1.004
## Proportion Var 0.583 0.251
## Cumulative Var 0.583 0.834
  • RC1 = mentaleffort + difficulty + linguisticchallenges (MDL)
  • RC2 = contentplanning (CONT)

Variance Explained

pca.diff$Vaccounted # Cumulative Variance Explained = 83.71%
##                             TC1       TC2
## SS loadings           2.3375134 1.0108791
## Proportion Var        0.5843783 0.2527198
## Cumulative Var        0.5843783 0.8370981
## Proportion Explained  0.6981002 0.3018998
## Cumulative Proportion 0.6981002 1.0000000

Extract the component scores and save as a dataframe

pca.diff.scores.df <- as.data.frame(pca.diff$scores)

Rename the components. RC1 = SA, RC2 = C for now, but come up with names that best represent each component.

colnames(pca.diff.scores.df) <- c("MDL", "CONT")

Add participant IDs to the dataframe.

pca.diff.scores.df$Participant <- diff$Participant

Combine the original dataset with the component scores based on the Participant IDs.

dat3.diff.scores <- left_join(dat3, pca.diff.scores.df, by = "Participant")

With the new dataset (dat3.diff.scores), you can use MDL and CONT as new variables.