Load necessary packages.
library(readr)
library(readxl)
library(dplyr)
library(psych)
library(factoextra)
library(GPArotation)
Set Working Directory.
Load data.
dat <- read_excel("IDs_only.xlsx")
dat <- dat[1:160, 1:15]
dat$Participant <- as.factor(dat$Participant) # Make participant IDs a factor
is.factor(dat$Participant) # Confirm it's a factor
## [1] TRUE
## Participant MasteryGoal_motivation
## 0 0
## AvoidanceGoal_motivation PerformanceGoal_motivation
## 0 0
## SelfEfficacy_motivation TaskValue_motivation
## 0 0
## Attribution_motivation Somatic_Anxiety
## 0 0
## Avoidance_Anxiety Cognitive_Anxiety
## 0 0
## OSPANabsolute_WM OSPANtotal_WM
## 0 0
## OSPANmath_WM RSPANpartial_WM
## 0 0
## RSPANtotal_WM
## 0
## [1] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## [38] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## [75] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## [112] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## [149] 0 0 0 0 0 0 0 0 0 0 0 0
No missing values
Extract relevant columns, and make sure there is only 1 row per 1 participant.
# Keep only the variables you need
mot <- dat %>%
select(Participant,
MasteryGoal_motivation,
AvoidanceGoal_motivation,
PerformanceGoal_motivation,
SelfEfficacy_motivation,
TaskValue_motivation,
Attribution_motivation)
# Make sure the variables are stored as numbers.
mot$MasteryGoal_motivation <- as.numeric(mot$MasteryGoal_motivation)
is.numeric(mot$MasteryGoal_motivation)
## [1] TRUE
mot$AvoidanceGoal_motivation <- as.numeric(mot$AvoidanceGoal_motivation)
is.numeric(mot$AvoidanceGoal_motivation)
## [1] TRUE
mot$PerformanceGoal_motivation <- as.numeric(mot$PerformanceGoal_motivation)
is.numeric(mot$PerformanceGoal_motivation)
## [1] TRUE
mot$SelfEfficacy_motivation <- as.numeric(mot$SelfEfficacy_motivation)
is.numeric(mot$SelfEfficacy_motivation)
## [1] TRUE
mot$TaskValue_motivation <- as.numeric(mot$TaskValue_motivation)
is.numeric(mot$TaskValue_motivation)
## [1] TRUE
mot$Attribution_motivation <- as.numeric(mot$Attribution_motivation)
is.numeric(mot$Attribution_motivation)
## [1] TRUE
##
## 1 10 100 101 102 103 104 105 106 107 108 109 11 110 111 112 113 114 115 116
## 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## 117 118 119 12 120 121 122 123 124 125 126 127 128 129 13 130 131 132 133 134
## 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## 135 136 137 138 139 14 140 141 142 143 144 145 146 147 148 149 15 150 151 152
## 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## 153 154 155 156 157 158 159 16 160 17 18 19 2 20 21 22 23 24 25 26
## 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## 27 28 29 3 30 31 32 33 34 35 36 37 38 39 4 40 41 42 43 44
## 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## 45 46 47 48 49 5 50 51 52 53 54 55 56 57 58 59 6 60 61 62
## 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## 63 64 65 66 67 68 69 7 70 71 72 73 74 75 76 77 78 79 8 80
## 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## 81 82 83 84 85 86 87 88 89 9 90 91 92 93 94 95 96 97 98 99
## 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Make sure there is no missing values.
## Participant MasteryGoal_motivation
## 0 0
## AvoidanceGoal_motivation PerformanceGoal_motivation
## 0 0
## SelfEfficacy_motivation TaskValue_motivation
## 0 0
## Attribution_motivation
## 0
No missing values.
Descriptives.
## vars n mean sd median trimmed mad min max
## Participant* 1 160 80.50 46.33 80.5 80.50 59.30 1 160
## MasteryGoal_motivation 2 160 22.68 5.80 22.0 22.65 5.93 7 39
## AvoidanceGoal_motivation 3 160 40.29 8.39 41.0 40.34 8.90 17 59
## PerformanceGoal_motivation 4 160 23.83 6.68 24.0 23.84 5.93 3 40
## SelfEfficacy_motivation 5 160 39.71 6.41 40.0 39.48 5.93 24 58
## TaskValue_motivation 6 160 27.86 7.96 27.0 27.63 7.41 7 50
## Attribution_motivation 7 160 19.36 3.34 20.0 19.49 2.97 10 30
## range skew kurtosis se
## Participant* 159 0.00 -1.22 3.66
## MasteryGoal_motivation 32 0.11 0.32 0.46
## AvoidanceGoal_motivation 42 -0.12 -0.46 0.66
## PerformanceGoal_motivation 37 -0.08 0.26 0.53
## SelfEfficacy_motivation 34 0.34 0.34 0.51
## TaskValue_motivation 43 0.22 0.13 0.63
## Attribution_motivation 20 -0.24 0.75 0.26
pca.motivation <- prcomp(mot[, 2:7], scale. = TRUE) # scale. = TRUE makes sure the values are standardized
summary(pca.motivation) # Look at the Cumulative Proportion
## Importance of components:
## PC1 PC2 PC3 PC4 PC5 PC6
## Standard deviation 1.6583 1.0405 0.9392 0.7749 0.66250 0.49621
## Proportion of Variance 0.4583 0.1804 0.1470 0.1001 0.07315 0.04104
## Cumulative Proportion 0.4583 0.6387 0.7857 0.8858 0.95896 1.00000
PC1 + PC2 explain 63.87% of variance. Let’s go with two components.
pca.mot <- principal(mot[, 2:7], # dataset
# number of components
nfactors = 2,
# rotation method is oblique (let the components correlate)
rotate = "oblimin",
# add component scores
scores = TRUE)
Loadings (hide loadings below |.40|)
##
## Loadings:
## TC1 TC2
## MasteryGoal_motivation 0.756
## AvoidanceGoal_motivation -0.846
## TaskValue_motivation 0.814
## PerformanceGoal_motivation 0.528
## SelfEfficacy_motivation 0.722
## Attribution_motivation 0.691
##
## TC1 TC2
## SS loadings 2.152 1.522
## Proportion Var 0.359 0.254
## Cumulative Var 0.359 0.612
Variance Explained
## TC1 TC2
## SS loadings 2.2311294 1.6012376
## Proportion Var 0.3718549 0.2668729
## Cumulative Var 0.3718549 0.6387278
## Proportion Explained 0.5821805 0.4178195
## Cumulative Proportion 0.5821805 1.0000000
Extract the component scores and save as a dataframe
Rename the components. RC1 = MotEng for “Motivation Engagement”, RC2 = “AchievEng” for “Achievement Engagement.”
Add participant IDs to the dataframe.
Combine the original dataset with the component scores based on the Participant IDs.
With the new dataset (dat2_with_scores), you can use MotEng and AchievEng as new variables.
Extract relevant columns, and make sure there is only 1 row per 1 participant.
# Keep only the variables you need
anx <- dat %>%
select(Participant,
Somatic_Anxiety,
Avoidance_Anxiety,
Cognitive_Anxiety)
# Make sure the variables are stored as numbers.
anx$Somatic_Anxiety <- as.numeric(anx$Somatic_Anxiety)
is.numeric(anx$Somatic_Anxiety)
## [1] TRUE
## [1] TRUE
## [1] TRUE
##
## 1 10 100 101 102 103 104 105 106 107 108 109 11 110 111 112 113 114 115 116
## 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## 117 118 119 12 120 121 122 123 124 125 126 127 128 129 13 130 131 132 133 134
## 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## 135 136 137 138 139 14 140 141 142 143 144 145 146 147 148 149 15 150 151 152
## 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## 153 154 155 156 157 158 159 16 160 17 18 19 2 20 21 22 23 24 25 26
## 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## 27 28 29 3 30 31 32 33 34 35 36 37 38 39 4 40 41 42 43 44
## 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## 45 46 47 48 49 5 50 51 52 53 54 55 56 57 58 59 6 60 61 62
## 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## 63 64 65 66 67 68 69 7 70 71 72 73 74 75 76 77 78 79 8 80
## 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## 81 82 83 84 85 86 87 88 89 9 90 91 92 93 94 95 96 97 98 99
## 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Make sure there is no missing values.
## Participant Somatic_Anxiety Avoidance_Anxiety Cognitive_Anxiety
## 0 0 0 0
No missing values.
Descriptives.
## vars n mean sd median trimmed mad min max range skew
## Participant* 1 160 80.50 46.33 80.5 80.50 59.30 1 160 159 0.00
## Somatic_Anxiety 2 160 22.19 6.62 21.5 22.08 8.15 10 37 27 0.16
## Avoidance_Anxiety 3 160 17.20 3.98 17.0 17.13 4.45 7 29 22 0.19
## Cognitive_Anxiety 4 160 33.92 5.09 35.0 34.12 5.93 19 44 25 -0.36
## kurtosis se
## Participant* -1.22 3.66
## Somatic_Anxiety -0.82 0.52
## Avoidance_Anxiety 0.18 0.31
## Cognitive_Anxiety -0.61 0.40
pca.anxiety <- prcomp(anx[, 2:4], scale. = TRUE) # scale. = TRUE makes sure the values are standardized
summary(pca.anxiety) # Look at the Cumulative Proportion
## Importance of components:
## PC1 PC2 PC3
## Standard deviation 1.2975 0.9132 0.6946
## Proportion of Variance 0.5612 0.2780 0.1608
## Cumulative Proportion 0.5612 0.8392 1.0000
PC1 + PC2 explain 83.92% of variance. Let’s go with two components because PC1 only explains 56.12% of variance.
pca.anx <- principal(anx[, 2:4], # dataset
# number of components
nfactors = 2,
# rotation method is oblique (let the components correlate)
rotate = "oblimin",
# add component scores
scores = TRUE)
Loadings (hide loadings below |.40|)
##
## Loadings:
## TC1 TC2
## Somatic_Anxiety 0.932
## Avoidance_Anxiety 0.741
## Cognitive_Anxiety 0.979
##
## TC1 TC2
## SS loadings 1.418 1.055
## Proportion Var 0.473 0.352
## Cumulative Var 0.473 0.825
Variance Explained
## TC1 TC2
## SS loadings 1.4403597 1.0772092
## Proportion Var 0.4801199 0.3590697
## Cumulative Var 0.4801199 0.8391896
## Proportion Explained 0.5721232 0.4278768
## Cumulative Proportion 0.5721232 1.0000000
Extract the component scores and save as a dataframe
Rename the components. RC1 = SA, RC2 = C for now, but come up with names that best represent each component.
Add participant IDs to the dataframe.
Combine the original dataset with the component scores based on the Participant IDs.
With the new dataset, you can use “StAnx” and “CogAnx” as new variables.
Load data.
Make participant IDs a factor.
dat2$Participant <- as.factor(dat2$Participant) # Make participant IDs a factor
is.factor(dat2$Participant) # Confirm it's a factor
## [1] TRUE
Make sure there are only 4 rows per participant.
##
## 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
## 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4
## 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40
## 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4
## 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60
## 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4
## 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80
## 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4
## 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100
## 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4
## 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120
## 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4
## 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140
## 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4
## 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160
## 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4
Create a column with the composite score (i.e., add up the four measures and divide it by 4)
dat2$PTD_composite <- (dat2$mentaleffort + dat2$difficulty + dat2$contentplanning + dat2$linguisticchallenges)/4
How does the distribution look?
Kind of normal … but is it different from a normal distribution? Run the Shapiro-Wilk test.
##
## Shapiro-Wilk normality test
##
## data: dat2$PTD_composite
## W = 0.98977, p-value = 0.0001981
The Shapiro-Wilk test comes out significant … meaning statistically speaking, the composite score is not normally distributed.
Combine the ID dataset with the component scores and the outcome only dataset.
## Participant MasteryGoal_motivation
## 0 0
## AvoidanceGoal_motivation PerformanceGoal_motivation
## 0 0
## SelfEfficacy_motivation TaskValue_motivation
## 0 0
## Attribution_motivation Somatic_Anxiety
## 0 0
## Avoidance_Anxiety Cognitive_Anxiety
## 0 0
## OSPANabsolute_WM OSPANtotal_WM
## 0 0
## OSPANmath_WM RSPANpartial_WM
## 0 0
## RSPANtotal_WM MotEng
## 0 0
## AchievEng StAnx
## 0 0
## CogAnx Genre
## 0 0
## Complexity Task
## 0 0
## Subordination Coordination
## 0 0
## AccuracyGender AccuracyNumber
## 0 0
## AccuracyTense AccuracyAspect
## 1 0
## LexicalDensity LexicalDiversity
## 0 0
## FluencySyllablesperMinute mentaleffort
## 0 1
## difficulty contentplanning
## 1 1
## linguisticchallenges PTD_composite
## 1 1
## [1] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## [38] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## [75] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## [112] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## [149] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## [186] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## [223] 5 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## [260] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## [297] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## [334] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## [371] 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## [408] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## [445] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## [482] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## [519] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## [556] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## [593] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## [630] 0 0 0 0 0 0 0 0 0 0 0
dat_complete_no_missing_data <- dat_complete %>%
filter(Participant != "56") %>%
filter(Participant != "93")
# Drop the levels that are not being used
dat_complete_no_missing_data$Participant <- droplevels(dat_complete_no_missing_data$Participant)
# Confirm the number of unique participant IDs
unique(dat_complete_no_missing_data$Participant) # Should be 158 levels
## [1] 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18
## [19] 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36
## [37] 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54
## [55] 55 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73
## [73] 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91
## [91] 92 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110
## [109] 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128
## [127] 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146
## [145] 147 148 149 150 151 152 153 154 155 156 157 158 159 160
## 158 Levels: 1 10 100 101 102 103 104 105 106 107 108 109 11 110 111 112 ... 99
Now that I have the complete dataset, I will save it as an csv file so I can use it for the analysis.
Check the working directory. There should be a csv file named “dataset_complete.csv”