Homework 2

library(haven) 
library(foreign) 
library(dplyr)

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

library(polycor)

## Warning: package 'polycor' was built under R version 3.5.3

library(corrplot)

## Warning: package 'corrplot' was built under R version 3.5.3

## corrplot 0.84 loaded

library(psych)

## Warning: package 'psych' was built under R version 3.5.3

## 
## Attaching package: 'psych'

## The following object is masked from 'package:polycor':
## 
##     polyserial

BSGCANM6 <- read_sav("BSGCANM6.sav") 
df = BSGCANM6 %>% select("BSBM19A", "BSBM19B", "BSBM19C", "BSBM19D", "BSBM17A", "BSBM20A", "BSBM20B", "BSBM20C", "BSBM20E", "BSBM20D", "BSBG01", "BSBG07A", "BSBG07B", "BSBG10A", "BSMMAT01") 


df1 <- na.omit(df) 
df = df1 %>% select(- BSBG01, - BSBG07A, - BSBG07B, -BSBG10A, -BSMMAT01)

attach(df) 
summary(df)

##     BSBM19A         BSBM19B         BSBM19C         BSBM19D     
##  Min.   :1.000   Min.   :1.000   Min.   :1.000   Min.   :1.000  
##  1st Qu.:1.000   1st Qu.:2.000   1st Qu.:2.000   1st Qu.:1.000  
##  Median :2.000   Median :3.000   Median :3.000   Median :2.000  
##  Mean   :1.791   Mean   :2.884   Mean   :2.757   Mean   :2.045  
##  3rd Qu.:2.000   3rd Qu.:4.000   3rd Qu.:4.000   3rd Qu.:3.000  
##  Max.   :4.000   Max.   :4.000   Max.   :4.000   Max.   :4.000  
##     BSBM17A         BSBM20A         BSBM20B         BSBM20C     
##  Min.   :1.000   Min.   :1.000   Min.   :1.000   Min.   :1.000  
##  1st Qu.:1.000   1st Qu.:1.000   1st Qu.:1.000   1st Qu.:1.000  
##  Median :2.000   Median :1.000   Median :2.000   Median :1.000  
##  Mean   :2.026   Mean   :1.655   Mean   :1.801   Mean   :1.385  
##  3rd Qu.:3.000   3rd Qu.:2.000   3rd Qu.:2.000   3rd Qu.:2.000  
##  Max.   :4.000   Max.   :4.000   Max.   :4.000   Max.   :4.000  
##     BSBM20E         BSBM20D     
##  Min.   :1.000   Min.   :1.000  
##  1st Qu.:2.000   1st Qu.:1.000  
##  Median :2.000   Median :1.000  
##  Mean   :2.458   Mean   :1.587  
##  3rd Qu.:3.000   3rd Qu.:2.000  
##  Max.   :4.000   Max.   :4.000

df <- as.data.frame(lapply(df, as.numeric))
hist(df$BSBM19A, main = "Распределение переменной \n 'Обычно у меня все хорошо получается в математике' ", xlab = "Степень согласия", ylab = "Частота ответа")

hist(df$BSBM19B, main = "Распределение переменной 'Математика для меня более  \n тяжелый предмет, чем для большинства моих одноклассников' ", xlab = "Степень согласия", ylab = "Частота ответа")

hist(df$BSBM19C, main = "Распределение переменной  \n 'Математика - не одна из моих сильных сторон' ", xlab = "Степень согласия", ylab = "Частота ответа")

hist(df$BSBM19D, main = "Распределение переменной  \n 'Я быстро учусь математике' ", xlab = "Степень согласия", ylab = "Частота ответа")

hist(df$BSBM17A, main = "Распределение переменной  \n 'Мне нравится изучать математику' ", xlab = "Степень согласия", ylab = "Частота ответа")

hist(df$BSBM20A, main = "Распределение переменной 'Я думаю, что изучение математики  \n поможет мне в моей повседневной жизни' ", xlab = "Степень согласия", ylab = "Частота ответа")

hist(df$BSBM20B, main = "Распределение переменной  'Мне нужна математика,  \n чтобы изучать другие школьные предметы' ", xlab = "Степень согласия", ylab = "Частота ответа")

hist(df$BSBM20C, main = "Распределение переменной 'Мне нужно хорошо разбираться  \n в математике, чтобы попасть в тот университет, в который хочу' ", xlab = "Степень согласия", ylab = "Частота ответа")

hist(df$BSBM20D, main = "Распределение переменной 'Мне нужно преуспеть в математике,  \n чтобы получить работу, которую я хочу' ", xlab = "Степень согласия", ylab = "Частота ответа")

hist(df$BSBM20E, main = "Распределение переменной 'Я хотел бы работу,  \n которая включает в себя использование математики' ", xlab = "Степень согласия", ylab = "Частота ответа")

df <- as.data.frame(lapply(df, as.factor))
df.cor = hetcor(df)

## Warning in log(P): созданы NaN

## Warning in log(P): созданы NaN

## Warning in log(P): созданы NaN

## Warning in log(P): созданы NaN

## Warning in log(P): созданы NaN

## Warning in log(P): созданы NaN

## Warning in log(P): созданы NaN

## Warning in log(P): созданы NaN

## Warning in log(P): созданы NaN

## Warning in log(P): созданы NaN

## Warning in log(P): созданы NaN

## Warning in log(P): созданы NaN

## Warning in log(P): созданы NaN

## Warning in log(P): созданы NaN

## Warning in log(P): созданы NaN

## Warning in log(P): созданы NaN

## Warning in log(P): созданы NaN

## Warning in log(P): созданы NaN

## Warning in log(P): созданы NaN

## Warning in log(P): созданы NaN

## Warning in log(P): созданы NaN

## Warning in log(P): созданы NaN

## Warning in log(P): созданы NaN

## Warning in log(P): созданы NaN

## Warning in log(P): созданы NaN

## Warning in log(P): созданы NaN

## Warning in log(P): созданы NaN

df.cor

## 
## Two-Step Estimates
## 
## Correlations/Type of Correlation:
##         BSBM19A    BSBM19B    BSBM19C    BSBM19D    BSBM17A    BSBM20A
## BSBM19A       1 Polychoric Polychoric Polychoric Polychoric Polychoric
## BSBM19B -0.7295          1 Polychoric Polychoric Polychoric Polychoric
## BSBM19C -0.7723     0.8176          1 Polychoric Polychoric Polychoric
## BSBM19D  0.7936    -0.6789    -0.7247          1 Polychoric Polychoric
## BSBM17A  0.6374    -0.5014    -0.6111     0.6473          1 Polychoric
## BSBM20A  0.3275    -0.2075    -0.2639     0.3301     0.4843          1
## BSBM20B  0.2808    -0.1683    -0.2077     0.2765     0.4088     0.6929
## BSBM20C  0.2775    -0.2003    -0.2155     0.2881     0.3397     0.5278
## BSBM20E  0.4744    -0.3853    -0.4855     0.5067     0.5941     0.5331
## BSBM20D  0.2635    -0.1749    -0.2336     0.2761      0.356     0.5046
##            BSBM20B    BSBM20C    BSBM20E    BSBM20D
## BSBM19A Polychoric Polychoric Polychoric Polychoric
## BSBM19B Polychoric Polychoric Polychoric Polychoric
## BSBM19C Polychoric Polychoric Polychoric Polychoric
## BSBM19D Polychoric Polychoric Polychoric Polychoric
## BSBM17A Polychoric Polychoric Polychoric Polychoric
## BSBM20A Polychoric Polychoric Polychoric Polychoric
## BSBM20B          1 Polychoric Polychoric Polychoric
## BSBM20C     0.5358          1 Polychoric Polychoric
## BSBM20E     0.4583     0.4853          1 Polychoric
## BSBM20D     0.4899     0.7664     0.6613          1
## 
## Standard Errors:
##          BSBM19A  BSBM19B  BSBM19C  BSBM19D  BSBM17A  BSBM20A BSBM20B
## BSBM19A                                                              
## BSBM19B 0.006166                                                     
## BSBM19C 0.005388 0.004302                                            
## BSBM19D 0.005086 0.006844 0.006063                                   
## BSBM17A 0.007927 0.009666 0.008067 0.007481                          
## BSBM20A  0.01259  0.01321  0.01286  0.01223  0.01053                 
## BSBM20B  0.01265  0.01309  0.01289  0.01233   0.0111 0.007204        
## BSBM20C  0.01433  0.01462  0.01454  0.01391   0.0135  0.01135 0.01099
## BSBM20E   0.0102   0.0109 0.009761 0.009422 0.008205 0.009769 0.01035
## BSBM20D  0.01345  0.01374  0.01341  0.01303  0.01233  0.01089  0.0108
##          BSBM20C BSBM20E
## BSBM19A                 
## BSBM19B                 
## BSBM19C                 
## BSBM19D                 
## BSBM17A                 
## BSBM20A                 
## BSBM20B                 
## BSBM20C                 
## BSBM20E  0.01175        
## BSBM20D 0.006636 0.00807
## 
## n = 8068 
## 
## P-values for Tests of Bivariate Normality:
##            BSBM19A    BSBM19B   BSBM19C   BSBM19D   BSBM17A   BSBM20A
## BSBM19A                                                              
## BSBM19B 4.625e-118                                                   
## BSBM19C 1.828e-149  8.95e-122                                        
## BSBM19D  8.278e-46 5.831e-111 1.05e-165                              
## BSBM17A  2.463e-24  1.245e-29 1.504e-55 2.635e-36                    
## BSBM20A  1.578e-06  2.823e-12 8.815e-10 3.362e-10 5.454e-13          
## BSBM20B  1.098e-12  6.915e-18  3.42e-12 6.989e-16 3.241e-15 2.958e-61
## BSBM20C  4.886e-15  5.963e-26 1.662e-22 2.989e-24 4.626e-24 1.075e-51
## BSBM20E   4.86e-17  1.806e-25 1.489e-26 1.236e-26 7.295e-33 3.059e-19
## BSBM20D  1.426e-10    1.4e-15 4.868e-11  4.75e-18 5.513e-14 1.378e-39
##           BSBM20B    BSBM20C  BSBM20E
## BSBM19A                              
## BSBM19B                              
## BSBM19C                              
## BSBM19D                              
## BSBM17A                              
## BSBM20A                              
## BSBM20B                              
## BSBM20C 7.407e-58                    
## BSBM20E 2.092e-18  5.788e-31         
## BSBM20D 2.479e-41 9.628e-102 1.53e-58

cor.plot(df.cor)

df <- as.data.frame(lapply(df, as.numeric))

fa.parallel(df.cor$correlations, n.obs=8191, fa="both", n.iter=100)

## Parallel analysis suggests that the number of factors =  4  and the number of components =  2

fa1 = fa(df.cor$correlations, nfactors=3, rotate="none", fm="ml") 
fa.diagram(fa(df.cor$correlations, nfactors=3, rotate="none", fm="ml"))

fa1

## Factor Analysis using method =  ml
## Call: fa(r = df.cor$correlations, nfactors = 3, rotate = "none", fm = "ml")
## Standardized loadings (pattern matrix) based upon correlation matrix
##           ML2   ML1   ML3   h2    u2 com
## BSBM19A  0.83  0.29 -0.04 0.77 0.225 1.2
## BSBM19B -0.81 -0.20  0.18 0.73 0.266 1.2
## BSBM19C -0.85 -0.26  0.15 0.82 0.180 1.2
## BSBM19D  0.79  0.30 -0.02 0.71 0.285 1.3
## BSBM17A  0.64  0.38  0.20 0.59 0.410 1.9
## BSBM20A  0.26  0.52  0.64 0.75 0.255 2.3
## BSBM20B  0.19  0.50  0.59 0.63 0.365 2.2
## BSBM20C  0.06  0.77  0.17 0.63 0.370 1.1
## BSBM20E  0.37  0.68  0.12 0.61 0.391 1.6
## BSBM20D -0.03  1.00 -0.01 1.00 0.005 1.0
## 
##                        ML2  ML1  ML3
## SS loadings           3.36 2.99 0.89
## Proportion Var        0.34 0.30 0.09
## Cumulative Var        0.34 0.64 0.72
## Proportion Explained  0.46 0.41 0.12
## Cumulative Proportion 0.46 0.88 1.00
## 
## Mean item complexity =  1.5
## Test of the hypothesis that 3 factors are sufficient.
## 
## The degrees of freedom for the null model are  45  and the objective function was  7.15
## The degrees of freedom for the model are 18  and the objective function was  0.28 
## 
## The root mean square of the residuals (RMSR) is  0.03 
## The df corrected root mean square of the residuals is  0.04 
## 
## Fit based upon off diagonal values = 1
## Measures of factor score adequacy             
##                                                    ML2  ML1  ML3
## Correlation of (regression) scores with factors   0.97 1.00 0.87
## Multiple R square of scores with factors          0.93 1.00 0.75
## Minimum correlation of possible factor scores     0.86 0.99 0.51

fa12 = fa(df.cor$correlations, nfactors=2, rotate="none", fm="ml") 
fa.diagram(fa(df.cor$correlations, nfactors=2, rotate="none", fm="ml"))

fa12

## Factor Analysis using method =  ml
## Call: fa(r = df.cor$correlations, nfactors = 2, rotate = "none", fm = "ml")
## Standardized loadings (pattern matrix) based upon correlation matrix
##           ML1   ML2   h2   u2 com
## BSBM19A  0.85 -0.24 0.78 0.22 1.2
## BSBM19B -0.78  0.35 0.72 0.28 1.4
## BSBM19C -0.84  0.32 0.81 0.19 1.3
## BSBM19D  0.82 -0.19 0.72 0.28 1.1
## BSBM17A  0.75  0.04 0.56 0.44 1.0
## BSBM20A  0.51  0.47 0.48 0.52 2.0
## BSBM20B  0.45  0.49 0.45 0.55 2.0
## BSBM20C  0.50  0.64 0.66 0.34 1.9
## BSBM20E  0.69  0.36 0.61 0.39 1.5
## BSBM20D  0.51  0.69 0.74 0.26 1.9
## 
##                        ML1  ML2
## SS loadings           4.73 1.80
## Proportion Var        0.47 0.18
## Cumulative Var        0.47 0.65
## Proportion Explained  0.72 0.28
## Cumulative Proportion 0.72 1.00
## 
## Mean item complexity =  1.5
## Test of the hypothesis that 2 factors are sufficient.
## 
## The degrees of freedom for the null model are  45  and the objective function was  7.15
## The degrees of freedom for the model are 26  and the objective function was  0.76 
## 
## The root mean square of the residuals (RMSR) is  0.05 
## The df corrected root mean square of the residuals is  0.07 
## 
## Fit based upon off diagonal values = 0.99
## Measures of factor score adequacy             
##                                                    ML1  ML2
## Correlation of (regression) scores with factors   0.97 0.92
## Multiple R square of scores with factors          0.94 0.85
## Minimum correlation of possible factor scores     0.89 0.70

fa1 = fa(df.cor$correlations, nfactors=4, rotate="none", fm="ml") 
fa.diagram(fa(df.cor$correlations, nfactors=4, rotate="none", fm="ml"))

fa2 = fa(df.cor$correlations, nfactors=3, rotate="varimax", fm="ml") 
fa.diagram(fa(df.cor$correlations, nfactors=3, rotate="varimax", fm="ml"))

fa2

## Factor Analysis using method =  ml
## Call: fa(r = df.cor$correlations, nfactors = 3, rotate = "varimax", 
##     fm = "ml")
## Standardized loadings (pattern matrix) based upon correlation matrix
##           ML2   ML1   ML3   h2    u2 com
## BSBM19A  0.85  0.13  0.18 0.77 0.225 1.1
## BSBM19B -0.85 -0.08 -0.03 0.73 0.266 1.0
## BSBM19C -0.89 -0.13 -0.07 0.82 0.180 1.1
## BSBM19D  0.81  0.15  0.20 0.71 0.285 1.2
## BSBM17A  0.62  0.20  0.40 0.59 0.410 2.0
## BSBM20A  0.18  0.31  0.79 0.75 0.255 1.4
## BSBM20B  0.12  0.32  0.72 0.63 0.365 1.4
## BSBM20C  0.12  0.69  0.37 0.63 0.370 1.6
## BSBM20E  0.42  0.55  0.35 0.61 0.391 2.6
## BSBM20D  0.11  0.96  0.24 1.00 0.005 1.2
## 
##                        ML2  ML1  ML3
## SS loadings           3.54 2.00 1.70
## Proportion Var        0.35 0.20 0.17
## Cumulative Var        0.35 0.55 0.72
## Proportion Explained  0.49 0.28 0.23
## Cumulative Proportion 0.49 0.77 1.00
## 
## Mean item complexity =  1.5
## Test of the hypothesis that 3 factors are sufficient.
## 
## The degrees of freedom for the null model are  45  and the objective function was  7.15
## The degrees of freedom for the model are 18  and the objective function was  0.28 
## 
## The root mean square of the residuals (RMSR) is  0.03 
## The df corrected root mean square of the residuals is  0.04 
## 
## Fit based upon off diagonal values = 1
## Measures of factor score adequacy             
##                                                    ML2  ML1  ML3
## Correlation of (regression) scores with factors   0.96 0.99 0.88
## Multiple R square of scores with factors          0.92 0.98 0.77
## Minimum correlation of possible factor scores     0.85 0.96 0.55

fa3 = fa(df.cor$correlations, nfactors=3, rotate="oblimin", fm="ml")

## Loading required namespace: GPArotation

## Warning in fac(r = r, nfactors = nfactors, n.obs = n.obs, rotate =
## rotate, : A loading greater than abs(1) was detected. Examine the loadings
## carefully.

fa.diagram(fa(df.cor$correlations, nfactors=3, rotate="oblimin", fm="ml"))

## Warning in fac(r = r, nfactors = nfactors, n.obs = n.obs, rotate =
## rotate, : A loading greater than abs(1) was detected. Examine the loadings
## carefully.

fa3 = fa(df, nfactors = 3, cor="mixed", fm="mle")

## 
## mixed.cor is deprecated, please use mixedCor.

## Warning in fac(r = r, nfactors = nfactors, n.obs = n.obs, rotate =
## rotate, : A loading greater than abs(1) was detected. Examine the loadings
## carefully.

fa3

## Factor Analysis using method =  ml
## Call: fa(r = df, nfactors = 3, fm = "mle", cor = "mixed")
## 
##  Warning: A Heywood case was detected. 
## Standardized loadings (pattern matrix) based upon correlation matrix
##           ML2   ML1   ML3   h2    u2 com
## BSBM19A  0.86 -0.01  0.06 0.77 0.225 1.0
## BSBM19B -0.89  0.01  0.11 0.73 0.266 1.0
## BSBM19C -0.92 -0.02  0.07 0.82 0.180 1.0
## BSBM19D  0.81  0.00  0.09 0.71 0.285 1.0
## BSBM17A  0.57  0.00  0.34 0.59 0.410 1.6
## BSBM20A  0.02  0.02  0.85 0.75 0.255 1.0
## BSBM20B -0.03  0.06  0.77 0.63 0.365 1.0
## BSBM20C  0.00  0.65  0.21 0.63 0.370 1.2
## BSBM20E  0.34  0.46  0.19 0.61 0.391 2.2
## BSBM20D -0.02  1.03 -0.05 1.00 0.005 1.0
## 
##                        ML2  ML1  ML3
## SS loadings           3.60 1.89 1.76
## Proportion Var        0.36 0.19 0.18
## Cumulative Var        0.36 0.55 0.72
## Proportion Explained  0.50 0.26 0.24
## Cumulative Proportion 0.50 0.76 1.00
## 
##  With factor correlations of 
##      ML2 ML1  ML3
## ML2 1.00 0.3 0.37
## ML1 0.30 1.0 0.60
## ML3 0.37 0.6 1.00
## 
## Mean item complexity =  1.2
## Test of the hypothesis that 3 factors are sufficient.
## 
## The degrees of freedom for the null model are  45  and the objective function was  7.15 with Chi Square of  57637.49
## The degrees of freedom for the model are 18  and the objective function was  0.28 
## 
## The root mean square of the residuals (RMSR) is  0.03 
## The df corrected root mean square of the residuals is  0.04 
## 
## The harmonic number of observations is  8068 with the empirical chi square  487.04  with prob <  5.5e-92 
## The total number of observations was  8068  with Likelihood Chi Square =  2253.06  with prob <  0 
## 
## Tucker Lewis Index of factoring reliability =  0.903
## RMSEA index =  0.124  and the 90 % confidence intervals are  0.12 0.128
## BIC =  2091.14
## Fit based upon off diagonal values = 1
## Measures of factor score adequacy             
##                                                    ML2  ML1  ML3
## Correlation of (regression) scores with factors   0.97 1.00 0.92
## Multiple R square of scores with factors          0.94 1.00 0.85
## Minimum correlation of possible factor scores     0.87 0.99 0.70

fa.diagram(fa(df, nfactors=3, cor="mixed", fm="mle"))

## 
## mixed.cor is deprecated, please use mixedCor.

## Warning in fac(r = r, nfactors = nfactors, n.obs = n.obs, rotate =
## rotate, : A loading greater than abs(1) was detected. Examine the loadings
## carefully.

f1 <- df[c('BSBM20D', 'BSBM20C', 'BSBM20E')] 
f2 <- df[c('BSBM19C', 'BSBM19B', 'BSBM19A', 'BSBM19D', 'BSBM17A')] 
f3 <- df[c('BSBM20A', 'BSBM20B')] 
alpha(f1, check.keys = TRUE)

## 
## Reliability analysis   
## Call: alpha(x = f1, check.keys = TRUE)
## 
##   raw_alpha std.alpha G6(smc) average_r S/N    ase mean   sd median_r
##       0.73      0.75     0.7       0.5   3 0.0051  1.8 0.67     0.53
## 
##  lower alpha upper     95% confidence boundaries
## 0.72 0.73 0.74 
## 
##  Reliability if an item is dropped:
##         raw_alpha std.alpha G6(smc) average_r S/N alpha se var.r med.r
## BSBM20D      0.49      0.52    0.36      0.36 1.1   0.0102    NA  0.36
## BSBM20C      0.68      0.69    0.53      0.53 2.2   0.0069    NA  0.53
## BSBM20E      0.76      0.77    0.63      0.63 3.4   0.0051    NA  0.63
## 
##  Item statistics 
##            n raw.r std.r r.cor r.drop mean   sd
## BSBM20D 8068  0.86  0.88  0.81   0.68  1.6 0.80
## BSBM20C 8068  0.75  0.81  0.67   0.54  1.4 0.65
## BSBM20E 8068  0.83  0.77  0.57   0.50  2.5 1.01
## 
## Non missing response frequency for each item
##            1    2    3    4 miss
## BSBM20D 0.58 0.29 0.10 0.03    0
## BSBM20C 0.69 0.25 0.04 0.02    0
## BSBM20E 0.20 0.32 0.29 0.18    0

alpha(f2, check.keys = TRUE)

## Warning in alpha(f2, check.keys = TRUE): Some items were negatively correlated with total scale and were automatically reversed.
##  This is indicated by a negative sign for the variable name.

## 
## Reliability analysis   
## Call: alpha(x = f2, check.keys = TRUE)
## 
##   raw_alpha std.alpha G6(smc) average_r S/N    ase mean  sd median_r
##       0.88      0.88    0.87       0.6 7.6 0.0021    2 0.8     0.61
## 
##  lower alpha upper     95% confidence boundaries
## 0.88 0.88 0.89 
## 
##  Reliability if an item is dropped:
##          raw_alpha std.alpha G6(smc) average_r S/N alpha se  var.r med.r
## BSBM19C-      0.84      0.85    0.81      0.58 5.5   0.0029 0.0086  0.58
## BSBM19B-      0.86      0.86    0.83      0.61 6.2   0.0026 0.0053  0.60
## BSBM19A       0.85      0.85    0.82      0.58 5.5   0.0027 0.0113  0.58
## BSBM19D       0.85      0.85    0.83      0.59 5.7   0.0027 0.0128  0.59
## BSBM17A       0.88      0.89    0.86      0.66 7.8   0.0021 0.0027  0.65
## 
##  Item statistics 
##             n raw.r std.r r.cor r.drop mean   sd
## BSBM19C- 8068  0.88  0.86  0.83   0.78  2.2 1.11
## BSBM19B- 8068  0.83  0.82  0.77   0.72  2.1 1.03
## BSBM19A  8068  0.85  0.86  0.82   0.77  1.8 0.86
## BSBM19D  8068  0.84  0.85  0.80   0.75  2.0 0.93
## BSBM17A  8068  0.73  0.74  0.63   0.59  2.0 0.92
## 
## Non missing response frequency for each item
##            1    2    3    4 miss
## BSBM19C 0.19 0.21 0.27 0.34    0
## BSBM19B 0.13 0.20 0.32 0.35    0
## BSBM19A 0.45 0.37 0.14 0.05    0
## BSBM19D 0.33 0.37 0.22 0.08    0
## BSBM17A 0.32 0.42 0.16 0.09    0

alpha(f3, check.keys = TRUE)

## Warning in matrix(unlist(drop.item), ncol = 10, byrow = TRUE): длина данных
## [16] не является множителем количества столбцов [10]

## 
## Reliability analysis   
## Call: alpha(x = f3, check.keys = TRUE)
## 
##   raw_alpha std.alpha G6(smc) average_r S/N    ase mean   sd median_r
##       0.74      0.74    0.58      0.58 2.8 0.0058  1.7 0.73     0.58
## 
##  lower alpha upper     95% confidence boundaries
## 0.73 0.74 0.75 
## 
##  Reliability if an item is dropped:
##         raw_alpha std.alpha G6(smc) average_r S/N alpha se var.r med.r
## BSBM20A      0.58      0.58    0.34      0.58  NA       NA  0.58  0.58
## BSBM20B      0.34      0.58      NA        NA  NA       NA  0.34  0.58
## 
##  Item statistics 
##            n raw.r std.r r.cor r.drop mean   sd
## BSBM20A 8068  0.89  0.89  0.68   0.58  1.7 0.80
## BSBM20B 8068  0.89  0.89  0.68   0.58  1.8 0.83
## 
## Non missing response frequency for each item
##            1    2    3    4 miss
## BSBM20A 0.52 0.35 0.09 0.04    0
## BSBM20B 0.42 0.39 0.14 0.04    0

a = fa3$scores 
a = cbind(a, f1, f2, f3) 
model = lm(df1$BSMMAT01 ~ df1$BSBG01 + df1$BSBG07A + df1$BSBG07B + df1$BSBG10A + a$ML1 + a$ML2 + a$ML3) 
summary(model)

## 
## Call:
## lm(formula = df1$BSMMAT01 ~ df1$BSBG01 + df1$BSBG07A + df1$BSBG07B + 
##     df1$BSBG10A + a$ML1 + a$ML2 + a$ML3)
## 
## Residuals:
## <Labelled double>
##      Min       1Q   Median       3Q      Max 
## -220.152  -36.339    2.069   38.425  189.504 
## 
## Labels:
##  value              label
##    999 Omitted or invalid
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 539.85098    3.56229 151.546  < 2e-16 ***
## df1$BSBG01   -1.55366    1.27410  -1.219   0.2227    
## df1$BSBG07A   0.52428    0.41886   1.252   0.2107    
## df1$BSBG07B   0.01009    0.40237   0.025   0.9800    
## df1$BSBG10A  -3.69020    1.85520  -1.989   0.0467 *  
## a$ML1         0.20318    0.89106   0.228   0.8196    
## a$ML2       -39.23760    0.70321 -55.798  < 2e-16 ***
## a$ML3         5.12616    0.98761   5.190 2.15e-07 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 56.77 on 8060 degrees of freedom
## Multiple R-squared:  0.3065, Adjusted R-squared:  0.3059 
## F-statistic: 508.9 on 7 and 8060 DF,  p-value: < 2.2e-16

Homework 2

Grishunina L., Kozlova D., Konovalova M., Pikunova S.