#Exploration of dataset about Psychometrics properties CPRD

library(readxl)
## Warning: package 'readxl' was built under R version 4.0.2
CPRD <- read_excel("CPRD.xlsx")
head(CPRD)
## # A tibble: 6 × 61
##   Sex    `Routine position` `Socioeconomic s… `Level of educat… `Years experien…
##   <chr>  <chr>              <chr>             <chr>             <chr>           
## 1 Female Spotter            Medio-bajo        12-15             Bastante        
## 2 Female Spotter            Bajo              9-11              Poca            
## 3 Female Flyer              Medio             9-11              Poca            
## 4 Female Flyer              Bajo-bajo         9-11              Poca            
## 5 Female Base               Bajo              9-11              Poca            
## 6 Female Spotter            Bajo-bajo         9-11              Poca            
## # … with 56 more variables: Age <dbl>, CE1 <dbl>, CE3 <dbl>, CE6 <dbl>,
## #   CE8 <dbl>, CE10 <dbl>, CE12 <dbl>, CE13 <dbl>, CE14 <dbl>, CE17 <dbl>,
## #   CE19 <dbl>, CE20 <dbl>, CE21 <dbl>, CE24 <dbl>, CE26 <dbl>, CE30 <dbl>,
## #   CE32 <dbl>, CE36 <dbl>, CE41 <dbl>, CE43 <dbl>, CE54 <dbl>, IER9 <dbl>,
## #   IER16 <dbl>, IER28 <dbl>, IER34 <dbl>, IER35 <dbl>, IER42 <dbl>,
## #   IER44 <dbl>, IER46 <dbl>, IER47 <dbl>, IER51 <dbl>, IER52 <dbl>,
## #   IER53 <dbl>, M4 <dbl>, M15 <dbl>, M29 <dbl>, M31 <dbl>, M33 <dbl>, …
tail(CPRD)
## # A tibble: 6 × 61
##   Sex    `Routine position` `Socioeconomic s… `Level of educat… `Years experien…
##   <chr>  <chr>              <chr>             <chr>             <chr>           
## 1 Male   Spotter            Bajo              9-11              Bastante        
## 2 Male   Base               Medio             16-20             Bastante        
## 3 Male   Base               Medio-bajo        9-11              Bastante        
## 4 Male   Base               Bajo-bajo         9-11              Bastante        
## 5 Female Flyer              Bajo              9-11              Bastante        
## 6 Male   Base               Medio-bajo        12-15             Bastante        
## # … with 56 more variables: Age <dbl>, CE1 <dbl>, CE3 <dbl>, CE6 <dbl>,
## #   CE8 <dbl>, CE10 <dbl>, CE12 <dbl>, CE13 <dbl>, CE14 <dbl>, CE17 <dbl>,
## #   CE19 <dbl>, CE20 <dbl>, CE21 <dbl>, CE24 <dbl>, CE26 <dbl>, CE30 <dbl>,
## #   CE32 <dbl>, CE36 <dbl>, CE41 <dbl>, CE43 <dbl>, CE54 <dbl>, IER9 <dbl>,
## #   IER16 <dbl>, IER28 <dbl>, IER34 <dbl>, IER35 <dbl>, IER42 <dbl>,
## #   IER44 <dbl>, IER46 <dbl>, IER47 <dbl>, IER51 <dbl>, IER52 <dbl>,
## #   IER53 <dbl>, M4 <dbl>, M15 <dbl>, M29 <dbl>, M31 <dbl>, M33 <dbl>, …
dim(CPRD)
## [1] 207  61

#Exploratory data analysis ##Outliers detection

sum(is.na(CPRD))
## [1] 0
summary(CPRD)
##      Sex            Routine position   Socioeconomic status Level of education
##  Length:207         Length:207         Length:207           Length:207        
##  Class :character   Class :character   Class :character     Class :character  
##  Mode  :character   Mode  :character   Mode  :character     Mode  :character  
##                                                                               
##                                                                               
##                                                                               
##  Years experience        Age             CE1             CE3       
##  Length:207         Min.   :13.00   Min.   :0.000   Min.   :0.000  
##  Class :character   1st Qu.:15.00   1st Qu.:2.000   1st Qu.:2.000  
##  Mode  :character   Median :16.00   Median :3.000   Median :3.000  
##                     Mean   :16.37   Mean   :2.841   Mean   :2.845  
##                     3rd Qu.:17.00   3rd Qu.:4.000   3rd Qu.:4.000  
##                     Max.   :28.00   Max.   :4.000   Max.   :4.000  
##       CE6             CE8             CE10           CE12            CE13      
##  Min.   :0.000   Min.   :0.000   Min.   :0.00   Min.   :0.000   Min.   :0.000  
##  1st Qu.:1.000   1st Qu.:3.000   1st Qu.:2.00   1st Qu.:1.000   1st Qu.:1.000  
##  Median :2.000   Median :3.000   Median :3.00   Median :2.000   Median :3.000  
##  Mean   :1.937   Mean   :3.087   Mean   :2.43   Mean   :1.923   Mean   :2.309  
##  3rd Qu.:3.000   3rd Qu.:4.000   3rd Qu.:4.00   3rd Qu.:3.000   3rd Qu.:3.000  
##  Max.   :4.000   Max.   :4.000   Max.   :4.00   Max.   :4.000   Max.   :4.000  
##       CE14            CE17            CE19            CE20           CE21      
##  Min.   :0.000   Min.   :0.000   Min.   :0.000   Min.   :0.00   Min.   :0.000  
##  1st Qu.:2.000   1st Qu.:1.000   1st Qu.:1.000   1st Qu.:2.00   1st Qu.:1.000  
##  Median :3.000   Median :2.000   Median :2.000   Median :3.00   Median :3.000  
##  Mean   :2.647   Mean   :2.092   Mean   :2.208   Mean   :2.44   Mean   :2.411  
##  3rd Qu.:4.000   3rd Qu.:3.000   3rd Qu.:3.000   3rd Qu.:3.00   3rd Qu.:3.000  
##  Max.   :4.000   Max.   :4.000   Max.   :4.000   Max.   :4.00   Max.   :4.000  
##       CE24          CE26           CE30            CE32            CE36      
##  Min.   :0.0   Min.   :0.00   Min.   :0.000   Min.   :0.000   Min.   :0.000  
##  1st Qu.:2.0   1st Qu.:2.00   1st Qu.:2.000   1st Qu.:3.000   1st Qu.:1.000  
##  Median :3.0   Median :3.00   Median :3.000   Median :3.000   Median :2.000  
##  Mean   :2.7   Mean   :2.57   Mean   :2.517   Mean   :3.246   Mean   :1.768  
##  3rd Qu.:4.0   3rd Qu.:4.00   3rd Qu.:3.000   3rd Qu.:4.000   3rd Qu.:3.000  
##  Max.   :4.0   Max.   :4.00   Max.   :4.000   Max.   :4.000   Max.   :4.000  
##       CE41            CE43            CE54            IER9      
##  Min.   :0.000   Min.   :0.000   Min.   :0.000   Min.   :0.000  
##  1st Qu.:2.000   1st Qu.:2.000   1st Qu.:3.000   1st Qu.:1.000  
##  Median :3.000   Median :3.000   Median :3.000   Median :2.000  
##  Mean   :2.488   Mean   :2.913   Mean   :3.193   Mean   :2.087  
##  3rd Qu.:3.000   3rd Qu.:4.000   3rd Qu.:4.000   3rd Qu.:3.000  
##  Max.   :4.000   Max.   :4.000   Max.   :4.000   Max.   :4.000  
##      IER16           IER28           IER34           IER35      
##  Min.   :0.000   Min.   :0.000   Min.   :0.000   Min.   :0.000  
##  1st Qu.:1.000   1st Qu.:1.000   1st Qu.:2.000   1st Qu.:1.000  
##  Median :1.000   Median :2.000   Median :3.000   Median :1.000  
##  Mean   :1.633   Mean   :1.903   Mean   :2.483   Mean   :1.536  
##  3rd Qu.:2.000   3rd Qu.:3.000   3rd Qu.:3.000   3rd Qu.:2.000  
##  Max.   :4.000   Max.   :4.000   Max.   :4.000   Max.   :4.000  
##      IER42           IER44           IER46           IER47      
##  Min.   :0.000   Min.   :0.000   Min.   :0.000   Min.   :0.000  
##  1st Qu.:3.000   1st Qu.:2.000   1st Qu.:2.000   1st Qu.:1.000  
##  Median :3.000   Median :3.000   Median :3.000   Median :2.000  
##  Mean   :3.174   Mean   :2.686   Mean   :2.758   Mean   :1.633  
##  3rd Qu.:4.000   3rd Qu.:3.000   3rd Qu.:4.000   3rd Qu.:2.000  
##  Max.   :4.000   Max.   :4.000   Max.   :4.000   Max.   :4.000  
##      IER51           IER52           IER53             M4             M15      
##  Min.   :0.000   Min.   :0.000   Min.   :0.000   Min.   :0.000   Min.   :0.00  
##  1st Qu.:1.000   1st Qu.:1.000   1st Qu.:2.000   1st Qu.:1.000   1st Qu.:3.00  
##  Median :2.000   Median :2.000   Median :3.000   Median :2.000   Median :3.00  
##  Mean   :1.879   Mean   :1.971   Mean   :2.454   Mean   :2.304   Mean   :2.99  
##  3rd Qu.:3.000   3rd Qu.:3.000   3rd Qu.:3.000   3rd Qu.:4.000   3rd Qu.:4.00  
##  Max.   :4.000   Max.   :4.000   Max.   :4.000   Max.   :4.000   Max.   :4.00  
##       M29             M31             M33             M39       
##  Min.   :0.000   Min.   :0.000   Min.   :0.000   Min.   :0.000  
##  1st Qu.:3.000   1st Qu.:2.000   1st Qu.:3.000   1st Qu.:3.000  
##  Median :3.000   Median :3.000   Median :4.000   Median :4.000  
##  Mean   :3.174   Mean   :2.942   Mean   :3.348   Mean   :3.237  
##  3rd Qu.:4.000   3rd Qu.:4.000   3rd Qu.:4.000   3rd Qu.:4.000  
##  Max.   :4.000   Max.   :4.000   Max.   :4.000   Max.   :4.000  
##       M49             M55            HM2             HM7             HM23      
##  Min.   :0.000   Min.   :0.00   Min.   :0.000   Min.   :0.000   Min.   :0.000  
##  1st Qu.:3.000   1st Qu.:3.00   1st Qu.:1.000   1st Qu.:3.000   1st Qu.:0.000  
##  Median :3.000   Median :4.00   Median :2.000   Median :3.000   Median :1.000  
##  Mean   :3.169   Mean   :3.58   Mean   :1.865   Mean   :3.087   Mean   :1.203  
##  3rd Qu.:4.000   3rd Qu.:4.00   3rd Qu.:3.000   3rd Qu.:4.000   3rd Qu.:2.000  
##  Max.   :4.000   Max.   :4.00   Max.   :4.000   Max.   :4.000   Max.   :4.000  
##       HM25           HM37            HM40            HM45            HM48      
##  Min.   :0.00   Min.   :0.000   Min.   :0.000   Min.   :0.000   Min.   :0.000  
##  1st Qu.:3.00   1st Qu.:2.000   1st Qu.:3.000   1st Qu.:2.000   1st Qu.:2.000  
##  Median :3.00   Median :2.000   Median :3.000   Median :3.000   Median :3.000  
##  Mean   :2.99   Mean   :2.401   Mean   :3.072   Mean   :2.816   Mean   :2.604  
##  3rd Qu.:4.00   3rd Qu.:3.000   3rd Qu.:4.000   3rd Qu.:4.000   3rd Qu.:4.000  
##  Max.   :4.00   Max.   :4.000   Max.   :4.000   Max.   :4.000   Max.   :4.000  
##       HM50            CHE5           CHE11           CHE18      
##  Min.   :0.000   Min.   :0.000   Min.   :0.000   Min.   :0.000  
##  1st Qu.:2.000   1st Qu.:3.000   1st Qu.:2.000   1st Qu.:3.000  
##  Median :3.000   Median :3.000   Median :3.000   Median :4.000  
##  Mean   :2.773   Mean   :3.203   Mean   :2.807   Mean   :3.396  
##  3rd Qu.:3.500   3rd Qu.:4.000   3rd Qu.:4.000   3rd Qu.:4.000  
##  Max.   :4.000   Max.   :4.000   Max.   :4.000   Max.   :4.000  
##      CHE22           CHE27           CHE38      
##  Min.   :0.000   Min.   :0.000   Min.   :0.000  
##  1st Qu.:3.000   1st Qu.:1.000   1st Qu.:3.000  
##  Median :4.000   Median :2.000   Median :4.000  
##  Mean   :3.536   Mean   :2.005   Mean   :3.396  
##  3rd Qu.:4.000   3rd Qu.:3.000   3rd Qu.:4.000  
##  Max.   :4.000   Max.   :4.000   Max.   :4.000

##Discarding the outliers from the dataset = did not apply

length(CPRD)
## [1] 61

#Exploratory Factor Analysis(EFA) ##Determine variables for EFA

data<-c("CE1","CE3","CE6","CE8","CE10","CE12","CE13","CE14","CE17","CE19","CE20","CE21","CE24","CE26","CE30","CE32","CE36","CE41","CE43","CE54","IER9","IER16","IER28","IER34","IER35","IER42","IER44","IER46","IER47","IER51","IER52","IER53","M4","M15","M29","M31","M33","M39","M49","M55","HM2","HM7","HM23","HM25","HM37","HM40","HM45","HM48","HM50","CHE5","CHE11","CHE18","CHE22","CHE27","CHE38")

###Check assumptions of Kaiser-Meyer-Olkin factor adequacy and Barlett test of sphericity

##EFA with items ordinal measurement

##Polychoric correlation matrix