mydata <- read.table("~/Desktop/SP.csv",
                     header = TRUE, sep = ",", dec = ".")
mydata$ID <- 1:nrow(mydata)
head(mydata)
##   Gender Age
## 1   Male  16
## 2   Male  16
## 3   Male  18
## 4   Male  21
## 5 Female  21
## 6   Male  21
##   I.learn.better.by.reading.what.the.teacher.writes.on.the.chalkboard.
## 1                                                                    3
## 2                                                                    5
## 3                                                                    3
## 4                                                                    1
## 5                                                                    4
## 6                                                                    2
##   I.learn.better.by.reading.than.by.listening.to.someone.
## 1                                                       3
## 2                                                       4
## 3                                                       3
## 4                                                       4
## 5                                                       5
## 6                                                       3
##   When.the.teacher.tells.me.the.instructions.I.understand.better
## 1                                                              4
## 2                                                              3
## 3                                                              2
## 4                                                              2
## 5                                                              4
## 6                                                              3
##   I.remember.things.I.have.heard.in.class.better.than.things.I.have.read.
## 1                                                                       3
## 2                                                                       4
## 3                                                                       3
## 4                                                                       2
## 5                                                                       3
## 6                                                                       4
##   I.prefer.to.learn.by.doing.something.in.class.
## 1                                              4
## 2                                              3
## 3                                              3
## 4                                              4
## 5                                              5
## 6                                              1
##   When.I.do.things.in.class..I.learn.better. ID
## 1                                          2  1
## 2                                          4  2
## 3                                          2  3
## 4                                          4  4
## 5                                          5  5
## 6                                          2  6

Data description

Data manipulation

colnames(mydata) [1] <- ("gender")
colnames(mydata) [2] <- ("age")
colnames(mydata) [3] <- ("TReading")
colnames(mydata) [4] <- ("Reading")
colnames(mydata) [5] <- ("TListening")
colnames(mydata) [6] <- ("Listening")
colnames(mydata) [7] <- ("Activity")
colnames(mydata) [8] <- ("Participation")
colnames(mydata) [9] <- ("id")

head(mydata)
##   gender age TReading Reading TListening Listening Activity Participation id
## 1   Male  16        3       3          4         3        4             2  1
## 2   Male  16        5       4          3         4        3             4  2
## 3   Male  18        3       3          2         3        3             2  3
## 4   Male  21        1       4          2         2        4             4  4
## 5 Female  21        4       5          4         3        5             5  5
## 6   Male  21        2       3          3         4        1             2  6
# For my own convenience, I changed words to abbreviations.
mydata$gender <- factor(mydata$gender, 
                             levels = c("Female", "Male"), 
                             labels = c("F", "M"))

Descriptive statistics

library(psych)
psych:: describe(mydata[ ,-c(1,2,9)])
##               vars   n mean   sd median trimmed  mad min max range  skew
## TReading         1 150 3.29 0.98      3    3.28 1.48   1   5     4 -0.21
## Reading          2 150 3.30 0.98      3    3.30 1.48   1   5     4 -0.16
## TListening       3 150 3.37 0.95      3    3.37 1.48   1   5     4 -0.24
## Listening        4 150 3.19 0.95      3    3.19 1.48   1   5     4 -0.11
## Activity         5 150 3.31 1.04      3    3.29 1.48   1   5     4  0.12
## Participation    6 150 3.46 0.98      4    3.48 1.48   1   5     4 -0.29
##               kurtosis   se
## TReading         -0.49 0.08
## Reading          -0.26 0.08
## TListening       -0.55 0.08
## Listening        -0.41 0.08
## Activity         -0.65 0.08
## Participation    -0.56 0.08

Research question: How can I use PC analysis to combine different study preferences, and summurize them based on passive and active learning?

mydata_PCA <- mydata[, -c(1,2,9)]
R <- cor(mydata_PCA)
round(R,3)
##               TReading Reading TListening Listening Activity Participation
## TReading         1.000   0.154      0.245     0.157   -0.021         0.106
## Reading          0.154   1.000      0.045     0.132   -0.051        -0.172
## TListening       0.245   0.045      1.000     0.292    0.325         0.253
## Listening        0.157   0.132      0.292     1.000    0.110         0.287
## Activity        -0.021  -0.051      0.325     0.110    1.000         0.369
## Participation    0.106  -0.172      0.253     0.287    0.369         1.000
library(psych)
corPlot(R)

library(psych)
cortest.bartlett(R, n= nrow(mydata))
## $chisq
## [1] 88.97409
## 
## $p.value
## [1] 1.540501e-12
## 
## $df
## [1] 15
det(R)
## [1] 0.5440486
library(psych)
KMO (R)
## Kaiser-Meyer-Olkin factor adequacy
## Call: KMO(r = R)
## Overall MSA =  0.58
## MSA for each item = 
##      TReading       Reading    TListening     Listening      Activity 
##          0.54          0.43          0.64          0.60          0.56 
## Participation 
##          0.58
library(FactoMineR)
components <- PCA(mydata_PCA,
                  scale.unit = TRUE,
                  graph = FALSE)

library(factoextra)
## Loading required package: ggplot2
## 
## Attaching package: 'ggplot2'
## The following objects are masked from 'package:psych':
## 
##     %+%, alpha
## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa
get_eigenvalue(components)
##       eigenvalue variance.percent cumulative.variance.percent
## Dim.1  1.8928531        31.547551                    31.54755
## Dim.2  1.2829543        21.382571                    52.93012
## Dim.3  0.8652761        14.421268                    67.35139
## Dim.4  0.8193070        13.655116                    81.00651
## Dim.5  0.6628751        11.047919                    92.05443
## Dim.6  0.4767344         7.945574                   100.00000
library(factoextra)

fviz_eig (components,
         choice = "eigenvalue",
         main = "Scree plot",
         ylab = "Eigenvalue",
         xlab = "Principal component",
         addlabels = TRUE)

library(psych)
fa.parallel(mydata_PCA,
            sim = FALSE,
            fa = "pc")

## Parallel analysis suggests that the number of factors =  NA  and the number of components =  2
library(FactoMineR)
components <- PCA(mydata_PCA,
                  ncp = 2,
                  scale.unit = TRUE,
                  graph = FALSE)
components
## **Results for the Principal Component Analysis (PCA)**
## The analysis was performed on 150 individuals, described by 6 variables
## *The results are available in the following objects:
## 
##    name               description                          
## 1  "$eig"             "eigenvalues"                        
## 2  "$var"             "results for the variables"          
## 3  "$var$coord"       "coord. for the variables"           
## 4  "$var$cor"         "correlations variables - dimensions"
## 5  "$var$cos2"        "cos2 for the variables"             
## 6  "$var$contrib"     "contributions of the variables"     
## 7  "$ind"             "results for the individuals"        
## 8  "$ind$coord"       "coord. for the individuals"         
## 9  "$ind$cos2"        "cos2 for the individuals"           
## 10 "$ind$contrib"     "contributions of the individuals"   
## 11 "$call"            "summary statistics"                 
## 12 "$call$centre"     "mean of the variables"              
## 13 "$call$ecart.type" "standard error of the variables"    
## 14 "$call$row.w"      "weights for the individuals"        
## 15 "$call$col.w"      "weights for the variables"
components$var$cor
##                    Dim.1      Dim.2
## TReading      0.37661145  0.5728243
## Reading       0.02134114  0.7564329
## TListening    0.72332496  0.1267760
## Listening     0.60510406  0.2821772
## Activity      0.61530888 -0.3955668
## Participation 0.69469887 -0.3612016
components$var$contrib
##                     Dim.1     Dim.2
## TReading       7.49324861 25.575942
## Reading        0.02406127 44.599468
## TListening    27.64076062  1.252746
## Listening     19.34386358  6.206299
## Activity      20.00181756 12.196311
## Participation 25.49624836 10.169234
fviz_pca_var(components, repel = TRUE)

fviz_pca_biplot(components, repel = TRUE)

Conclusion

I used PC analysis and given all the tests and results, I can conclude that the variables can be summarized into two components: