1 Prep

Load necessary packages.

library(readr)
library(readxl)
library(dplyr)
library(psych)
library(factoextra)
library(GPArotation)

Set Working Directory.

setwd("/Users/sanshirohogawa/Library/Mobile Documents/com~apple~CloudDocs/Stats Consulting/Abbie")

Load data.

dat <- read_excel("IDs_only.xlsx")
dat <- dat[1:160, 1:15]
dat$Participant <- as.factor(dat$Participant) # Make participant IDs a factor
is.factor(dat$Participant) # Confirm it's a factor

## [1] TRUE

1.1 Check for issues with data

colSums(is.na(dat))

##                Participant     MasteryGoal_motivation 
##                          0                          0 
##   AvoidanceGoal_motivation PerformanceGoal_motivation 
##                          0                          0 
##    SelfEfficacy_motivation       TaskValue_motivation 
##                          0                          0 
##     Attribution_motivation            Somatic_Anxiety 
##                          0                          0 
##          Avoidance_Anxiety          Cognitive_Anxiety 
##                          0                          0 
##           OSPANabsolute_WM              OSPANtotal_WM 
##                          0                          0 
##               OSPANmath_WM            RSPANpartial_WM 
##                          0                          0 
##              RSPANtotal_WM 
##                          0

rowSums(is.na(dat))

##   [1] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
##  [38] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
##  [75] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## [112] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## [149] 0 0 0 0 0 0 0 0 0 0 0 0

No missing values

2 PCA for Motivation

Extract relevant columns, and make sure there is only 1 row per 1 participant.

# Keep only the variables you need
mot <- dat %>%
  select(Participant, 
         MasteryGoal_motivation,
         AvoidanceGoal_motivation,
         PerformanceGoal_motivation,
         SelfEfficacy_motivation,
         TaskValue_motivation,
         Attribution_motivation)

# Make sure the variables are stored as numbers.
mot$MasteryGoal_motivation <- as.numeric(mot$MasteryGoal_motivation)
is.numeric(mot$MasteryGoal_motivation)

## [1] TRUE

mot$AvoidanceGoal_motivation <- as.numeric(mot$AvoidanceGoal_motivation)
is.numeric(mot$AvoidanceGoal_motivation)

## [1] TRUE

mot$PerformanceGoal_motivation <- as.numeric(mot$PerformanceGoal_motivation)
is.numeric(mot$PerformanceGoal_motivation)

## [1] TRUE

mot$SelfEfficacy_motivation <- as.numeric(mot$SelfEfficacy_motivation)
is.numeric(mot$SelfEfficacy_motivation)

## [1] TRUE

mot$TaskValue_motivation <- as.numeric(mot$TaskValue_motivation)
is.numeric(mot$TaskValue_motivation)

## [1] TRUE

mot$Attribution_motivation <- as.numeric(mot$Attribution_motivation)
is.numeric(mot$Attribution_motivation)

## [1] TRUE

# Confirm there is only one row for each participant
table(mot$Participant)

## 
##   1  10 100 101 102 103 104 105 106 107 108 109  11 110 111 112 113 114 115 116 
##   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1 
## 117 118 119  12 120 121 122 123 124 125 126 127 128 129  13 130 131 132 133 134 
##   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1 
## 135 136 137 138 139  14 140 141 142 143 144 145 146 147 148 149  15 150 151 152 
##   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1 
## 153 154 155 156 157 158 159  16 160  17  18  19   2  20  21  22  23  24  25  26 
##   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1 
##  27  28  29   3  30  31  32  33  34  35  36  37  38  39   4  40  41  42  43  44 
##   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1 
##  45  46  47  48  49   5  50  51  52  53  54  55  56  57  58  59   6  60  61  62 
##   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1 
##  63  64  65  66  67  68  69   7  70  71  72  73  74  75  76  77  78  79   8  80 
##   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1 
##  81  82  83  84  85  86  87  88  89   9  90  91  92  93  94  95  96  97  98  99 
##   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1

Make sure there is no missing values.

colSums(is.na(mot))

##                Participant     MasteryGoal_motivation 
##                          0                          0 
##   AvoidanceGoal_motivation PerformanceGoal_motivation 
##                          0                          0 
##    SelfEfficacy_motivation       TaskValue_motivation 
##                          0                          0 
##     Attribution_motivation 
##                          0

No missing values.

Descriptives.

describe(mot)

##                            vars   n  mean    sd median trimmed   mad min max
## Participant*                  1 160 80.50 46.33   80.5   80.50 59.30   1 160
## MasteryGoal_motivation        2 160 22.68  5.80   22.0   22.65  5.93   7  39
## AvoidanceGoal_motivation      3 160 40.29  8.39   41.0   40.34  8.90  17  59
## PerformanceGoal_motivation    4 160 23.83  6.68   24.0   23.84  5.93   3  40
## SelfEfficacy_motivation       5 160 39.71  6.41   40.0   39.48  5.93  24  58
## TaskValue_motivation          6 160 27.86  7.96   27.0   27.63  7.41   7  50
## Attribution_motivation        7 160 19.36  3.34   20.0   19.49  2.97  10  30
##                            range  skew kurtosis   se
## Participant*                 159  0.00    -1.22 3.66
## MasteryGoal_motivation        32  0.11     0.32 0.46
## AvoidanceGoal_motivation      42 -0.12    -0.46 0.66
## PerformanceGoal_motivation    37 -0.08     0.26 0.53
## SelfEfficacy_motivation       34  0.34     0.34 0.51
## TaskValue_motivation          43  0.22     0.13 0.63
## Attribution_motivation        20 -0.24     0.75 0.26

2.1 PCA Version 1

2.1.1 Get the correlation matrix

cor.mat.mot <- cor(mot[, 2:7])

2.1.2 Get the Eigenvalues.

eig.vals.mot <- eigen(cor.mat.mot)$values

2.1.3 Plot Eigenvalues.

plot(eig.vals.mot,
     type = "b",
     main = "Scree Plot",
     xlab = "Index",
     ylab = "Eigenvalue")
abline(h = 1, lty = 2)  # Kaiser reference

The scree plot supports two components (i.e., two circles above the cut off value)

2.2 PCA Version 2

2.2.1 Run PCA (# of components not pre-determined)

pca.motivation <- prcomp(mot[, 2:7], scale. = TRUE) # scale. = TRUE makes sure the values are standardized
summary(pca.motivation) # Look at the Cumulative Proportion

## Importance of components:
##                           PC1    PC2    PC3    PC4     PC5     PC6
## Standard deviation     1.6583 1.0405 0.9392 0.7749 0.66250 0.49621
## Proportion of Variance 0.4583 0.1804 0.1470 0.1001 0.07315 0.04104
## Cumulative Proportion  0.4583 0.6387 0.7857 0.8858 0.95896 1.00000

PC1 + PC2 explain 63.87% of variance. Let’s go with two components.

2.2.2 Scree plot

fviz_eig(pca.motivation, addlabels = TRUE)

The scree plot supports two components.

2.3 Run PCA with 2 Components

pca.mot <- principal(mot[, 2:7], # dataset
                     # number of components
                     nfactors = 2, 
                     # rotation method is oblique (let the components correlate)
                     rotate = "oblimin",
                     # add component scores
                     scores = TRUE)

2.3.1 PCA Results for Motivation

Loadings (hide loadings below |.40|)

print(pca.mot$loadings, cutoff = .40, sort = TRUE)

## 
## Loadings:
##                            TC1    TC2   
## MasteryGoal_motivation      0.756       
## AvoidanceGoal_motivation   -0.846       
## TaskValue_motivation        0.814       
## PerformanceGoal_motivation         0.528
## SelfEfficacy_motivation            0.722
## Attribution_motivation             0.691
## 
##                  TC1   TC2
## SS loadings    2.152 1.522
## Proportion Var 0.359 0.254
## Cumulative Var 0.359 0.612

RC1 = Mastery Goal, (negative loading) Avoidance Goal (loading negative), Task Value (“Motivation Engagement”)
RC2 = Performance Goal, Self Efficacy, Attribution (“Achievement Engagement”)

Variance Explained

pca.mot$Vaccounted # Cumulative Variance Explained = 63.87%

##                             TC1       TC2
## SS loadings           2.2311294 1.6012376
## Proportion Var        0.3718549 0.2668729
## Cumulative Var        0.3718549 0.6387278
## Proportion Explained  0.5821805 0.4178195
## Cumulative Proportion 0.5821805 1.0000000

Extract the component scores and save as a dataframe

pca.mot.scores.df <- as.data.frame(pca.mot$scores)

Rename the components. RC1 = MotEng for “Motivation Engagement”, RC2 = “AchievEng” for “Achievement Engagement.”

colnames(pca.mot.scores.df) <- c("MotEng", "AchievEng")

Add participant IDs to the dataframe.

pca.mot.scores.df$Participant <- mot$Participant

Combine the original dataset with the component scores based on the Participant IDs.

dat_pca_scores <- left_join(dat, pca.mot.scores.df, by = "Participant")

With the new dataset (dat2_with_scores), you can use MotEng and AchievEng as new variables.

3 PCA for Anxiety

Extract relevant columns, and make sure there is only 1 row per 1 participant.

# Keep only the variables you need
anx <- dat %>%
  select(Participant, 
         Somatic_Anxiety, 
         Avoidance_Anxiety, 
         Cognitive_Anxiety)

# Make sure the variables are stored as numbers.
anx$Somatic_Anxiety <- as.numeric(anx$Somatic_Anxiety)
is.numeric(anx$Somatic_Anxiety)

## [1] TRUE

anx$Avoidance_Anxiety <- as.numeric(anx$Avoidance_Anxiety)
is.numeric(anx$Avoidance_Anxiety)

## [1] TRUE

anx$Cognitive_Anxiety <- as.numeric(anx$Cognitive_Anxiety)
is.numeric(anx$Cognitive_Anxiety)

## [1] TRUE

# Confirm there is only one row for each participant
table(anx$Participant)

## 
##   1  10 100 101 102 103 104 105 106 107 108 109  11 110 111 112 113 114 115 116 
##   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1 
## 117 118 119  12 120 121 122 123 124 125 126 127 128 129  13 130 131 132 133 134 
##   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1 
## 135 136 137 138 139  14 140 141 142 143 144 145 146 147 148 149  15 150 151 152 
##   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1 
## 153 154 155 156 157 158 159  16 160  17  18  19   2  20  21  22  23  24  25  26 
##   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1 
##  27  28  29   3  30  31  32  33  34  35  36  37  38  39   4  40  41  42  43  44 
##   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1 
##  45  46  47  48  49   5  50  51  52  53  54  55  56  57  58  59   6  60  61  62 
##   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1 
##  63  64  65  66  67  68  69   7  70  71  72  73  74  75  76  77  78  79   8  80 
##   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1 
##  81  82  83  84  85  86  87  88  89   9  90  91  92  93  94  95  96  97  98  99 
##   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1

Make sure there is no missing values.

colSums(is.na(anx))

##       Participant   Somatic_Anxiety Avoidance_Anxiety Cognitive_Anxiety 
##                 0                 0                 0                 0

No missing values.

Descriptives.

describe(anx)

##                   vars   n  mean    sd median trimmed   mad min max range  skew
## Participant*         1 160 80.50 46.33   80.5   80.50 59.30   1 160   159  0.00
## Somatic_Anxiety      2 160 22.19  6.62   21.5   22.08  8.15  10  37    27  0.16
## Avoidance_Anxiety    3 160 17.20  3.98   17.0   17.13  4.45   7  29    22  0.19
## Cognitive_Anxiety    4 160 33.92  5.09   35.0   34.12  5.93  19  44    25 -0.36
##                   kurtosis   se
## Participant*         -1.22 3.66
## Somatic_Anxiety      -0.82 0.52
## Avoidance_Anxiety     0.18 0.31
## Cognitive_Anxiety    -0.61 0.40

3.1 PCA Version 1

3.1.1 Get the correlation matrix

cor.mat.anx <- cor(anx[, 2:4])

3.1.2 Get the Eigenvalues.

eig.vals.anx <- eigen(cor.mat.anx)$values

3.1.3 Plot Eigenvalues.

plot(eig.vals.anx,
     type = "b",
     main = "Scree Plot",
     xlab = "Index",
     ylab = "Eigenvalue")
abline(h = 1, lty = 2)  # Kaiser reference

The scree plot supports one component … (i.e., one circle above the cut off value)

3.2 PCA Version 2

3.2.1 Run PCA (# of components not pre-determined)

pca.anxiety <- prcomp(anx[, 2:4], scale. = TRUE) # scale. = TRUE makes sure the values are standardized
summary(pca.anxiety) # Look at the Cumulative Proportion

## Importance of components:
##                           PC1    PC2    PC3
## Standard deviation     1.2975 0.9132 0.6946
## Proportion of Variance 0.5612 0.2780 0.1608
## Cumulative Proportion  0.5612 0.8392 1.0000

PC1 + PC2 explain 83.92% of variance. Let’s go with two components because PC1 only explains 56.12% of variance.

3.2.2 Scree plot

fviz_eig(pca.anxiety, addlabels = TRUE)

The scree plot supports two components.

3.3 Run PCA with 2 Components

pca.anx <- principal(anx[, 2:4], # dataset
                     # number of components
                     nfactors = 2, 
                     # rotation method is oblique (let the components correlate)
                     rotate = "oblimin",
                     # add component scores
                     scores = TRUE)

3.3.1 PCA Results for Anxiety

Loadings (hide loadings below |.40|)

print(pca.anx$loadings, cutoff = .40, sort = TRUE)

## 
## Loadings:
##                   TC1    TC2   
## Somatic_Anxiety    0.932       
## Avoidance_Anxiety  0.741       
## Cognitive_Anxiety         0.979
## 
##                  TC1   TC2
## SS loadings    1.418 1.055
## Proportion Var 0.473 0.352
## Cumulative Var 0.473 0.825

RC1 = Somatic + Avoidance (“Stress Anxiety”)
RC2 = Cognitive (“Cognitive Anxiety”)

Variance Explained

pca.anx$Vaccounted # Cumulative Variance Explained = 83.91%

##                             TC1       TC2
## SS loadings           1.4403597 1.0772092
## Proportion Var        0.4801199 0.3590697
## Cumulative Var        0.4801199 0.8391896
## Proportion Explained  0.5721232 0.4278768
## Cumulative Proportion 0.5721232 1.0000000

Extract the component scores and save as a dataframe

pca.anx.scores.df <- as.data.frame(pca.anx$scores)

Rename the components. RC1 = SA, RC2 = C for now, but come up with names that best represent each component.

colnames(pca.anx.scores.df) <- c("StAnx", "CogAnx")

Add participant IDs to the dataframe.

pca.anx.scores.df$Participant <- anx$Participant

Combine the original dataset with the component scores based on the Participant IDs.

dat_pca_scores <- left_join(dat_pca_scores, pca.anx.scores.df, by = "Participant")

With the new dataset, you can use “StAnx” and “CogAnx” as new variables.

4 Composite Score for Perceived Task Difficulty (PTD)

Load data.

dat2 <- read_excel("outcome_vars_only.xlsx")

Make participant IDs a factor.

dat2$Participant <- as.factor(dat2$Participant) # Make participant IDs a factor
is.factor(dat2$Participant) # Confirm it's a factor

## [1] TRUE

Make sure there are only 4 rows per participant.

# Confirm there are 4 values for each participant
table(dat2$Participant)

## 
##   1   2   3   4   5   6   7   8   9  10  11  12  13  14  15  16  17  18  19  20 
##   4   4   4   4   4   4   4   4   4   4   4   4   4   4   4   4   4   4   4   4 
##  21  22  23  24  25  26  27  28  29  30  31  32  33  34  35  36  37  38  39  40 
##   4   4   4   4   4   4   4   4   4   4   4   4   4   4   4   4   4   4   4   4 
##  41  42  43  44  45  46  47  48  49  50  51  52  53  54  55  56  57  58  59  60 
##   4   4   4   4   4   4   4   4   4   4   4   4   4   4   4   4   4   4   4   4 
##  61  62  63  64  65  66  67  68  69  70  71  72  73  74  75  76  77  78  79  80 
##   4   4   4   4   4   4   4   4   4   4   4   4   4   4   4   4   4   4   4   4 
##  81  82  83  84  85  86  87  88  89  90  91  92  93  94  95  96  97  98  99 100 
##   4   4   4   4   4   4   4   4   4   4   4   4   4   4   4   4   4   4   4   4 
## 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 
##   4   4   4   4   4   4   4   4   4   4   4   4   4   4   4   4   4   4   4   4 
## 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 
##   4   4   4   4   4   4   4   4   4   4   4   4   4   4   4   4   4   4   4   4 
## 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 
##   4   4   4   4   4   4   4   4   4   4   4   4   4   4   4   4   4   4   4   4

Create a column with the composite score (i.e., add up the four measures and divide it by 4)

dat2$PTD_composite <- (dat2$mentaleffort + dat2$difficulty + dat2$contentplanning + dat2$linguisticchallenges)/4

How does the distribution look?

hist(dat2$PTD_composite)

Kind of normal … but is it different from a normal distribution? Run the Shapiro-Wilk test.

shapiro.test(dat2$PTD_composite)

## 
##  Shapiro-Wilk normality test
## 
## data:  dat2$PTD_composite
## W = 0.98977, p-value = 0.0001981

The Shapiro-Wilk test comes out significant … meaning statistically speaking, the composite score is not normally distributed.

5 Combine Datasets

Combine the ID dataset with the component scores and the outcome only dataset.

dat_complete <- left_join(dat_pca_scores, dat2, by = "Participant")

5.1 Check for issues with data

colSums(is.na(dat_complete))

##                Participant     MasteryGoal_motivation 
##                          0                          0 
##   AvoidanceGoal_motivation PerformanceGoal_motivation 
##                          0                          0 
##    SelfEfficacy_motivation       TaskValue_motivation 
##                          0                          0 
##     Attribution_motivation            Somatic_Anxiety 
##                          0                          0 
##          Avoidance_Anxiety          Cognitive_Anxiety 
##                          0                          0 
##           OSPANabsolute_WM              OSPANtotal_WM 
##                          0                          0 
##               OSPANmath_WM            RSPANpartial_WM 
##                          0                          0 
##              RSPANtotal_WM                     MotEng 
##                          0                          0 
##                  AchievEng                      StAnx 
##                          0                          0 
##                     CogAnx                      Genre 
##                          0                          0 
##                 Complexity                       Task 
##                          0                          0 
##              Subordination               Coordination 
##                          0                          0 
##             AccuracyGender             AccuracyNumber 
##                          0                          0 
##              AccuracyTense             AccuracyAspect 
##                          1                          0 
##             LexicalDensity           LexicalDiversity 
##                          0                          0 
##  FluencySyllablesperMinute               mentaleffort 
##                          0                          1 
##                 difficulty            contentplanning 
##                          1                          1 
##       linguisticchallenges              PTD_composite 
##                          1                          1

rowSums(is.na(dat_complete))

##   [1] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
##  [38] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
##  [75] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## [112] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## [149] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## [186] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## [223] 5 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## [260] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## [297] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## [334] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## [371] 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## [408] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## [445] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## [482] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## [519] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## [556] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## [593] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## [630] 0 0 0 0 0 0 0 0 0 0 0

View(dat_complete)

Participant 56 is missing data for PTD measures.
Participant 93 is missing data for AccuracyTense. Therefore, I will remove these participants.

dat_complete_no_missing_data <- dat_complete %>% 
  filter(Participant != "56") %>% 
  filter(Participant != "93")

# Drop the levels that are not being used
dat_complete_no_missing_data$Participant <- droplevels(dat_complete_no_missing_data$Participant)

# Confirm the number of unique participant IDs
unique(dat_complete_no_missing_data$Participant) # Should be 158 levels

##   [1] 1   2   3   4   5   6   7   8   9   10  11  12  13  14  15  16  17  18 
##  [19] 19  20  21  22  23  24  25  26  27  28  29  30  31  32  33  34  35  36 
##  [37] 37  38  39  40  41  42  43  44  45  46  47  48  49  50  51  52  53  54 
##  [55] 55  57  58  59  60  61  62  63  64  65  66  67  68  69  70  71  72  73 
##  [73] 74  75  76  77  78  79  80  81  82  83  84  85  86  87  88  89  90  91 
##  [91] 92  94  95  96  97  98  99  100 101 102 103 104 105 106 107 108 109 110
## [109] 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128
## [127] 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146
## [145] 147 148 149 150 151 152 153 154 155 156 157 158 159 160
## 158 Levels: 1 10 100 101 102 103 104 105 106 107 108 109 11 110 111 112 ... 99

Now that I have the complete dataset, I will save it as an csv file so I can use it for the analysis.

write.csv(dat_complete_no_missing_data,"dataset_complete.csv", row.names = FALSE)

Check the working directory. There should be a csv file named “dataset_complete.csv”

Data Preparation for Abbie’s Dissertation

Sanshiroh Ogawa

2025-05-30