library(readxl)
## Warning: package 'readxl' was built under R version 4.1.3
datos <- read_excel("D:/Documentos HDD/Cuarto semestre/Estadistica Aplicada/ea9am/Concentracion.xlsx")
datost<- datos
datost<- scale(datost, center= T, scale= T)
datost<- as.data.frame(datost)
H0: Normalidad multivariante H1: No normalidad multivariante Confianza= 95% Alfa= 5% = 0,05 P value > alfa: no se rechaza la H0 (Normalidad) P value > alfa: se rechaza la H0 (No normalidad)
library(MVN)
## Warning: package 'MVN' was built under R version 4.1.3
mvn(datost[2:7])
## $multivariateNormality
## Test HZ p value MVN
## 1 Henze-Zirkler 4.906423 0 NO
##
## $univariateNormality
## Test Variable Statistic p value Normality
## 1 Anderson-Darling SO2 3.0767 <0.001 NO
## 2 Anderson-Darling PM10 4.9622 <0.001 NO
## 3 Anderson-Darling Reactivacion_Comercial 1.9184 1e-04 NO
## 4 Anderson-Darling Supermercado_Farmacia 2.7693 <0.001 NO
## 5 Anderson-Darling Parques_Centros 9.9286 <0.001 NO
## 6 Anderson-Darling Estaciones_Transito 2.5745 <0.001 NO
##
## $Descriptives
## n Mean Std.Dev Median Min Max
## SO2 447 -6.189242e-17 1 0.06965129 -2.004845 3.393844
## PM10 447 -3.051938e-17 1 -0.14037071 -3.579765 3.888564
## Reactivacion_Comercial 447 -6.585740e-17 1 0.01854640 -2.392092 2.252796
## Supermercado_Farmacia 447 3.998838e-17 1 -0.12681933 -2.587563 3.792143
## Parques_Centros 447 7.320669e-18 1 -0.08011009 -2.441994 3.348431
## Estaciones_Transito 447 9.135919e-18 1 -0.05730576 -2.182862 3.376285
## 25th 75th Skew Kurtosis
## SO2 -0.8867283 0.7900646 0.14664441 -0.7193595
## PM10 -0.7341725 0.5271334 0.73992750 1.0482736
## Reactivacion_Comercial -0.6870062 0.6065069 0.01598432 -0.2850923
## Supermercado_Farmacia -0.5825126 0.6022899 0.44341119 0.8788276
## Parques_Centros -0.5372489 0.4151237 0.97018812 1.9155406
## Estaciones_Transito -0.6568215 0.5422100 0.34792081 -0.1004907
Como el p value > alfa, no se rechaza la H0, por lo tanto existe la normalidad multivariante.
library(psych)
## Warning: package 'psych' was built under R version 4.1.3
corr.test(datost[,2:7])
## Call:corr.test(x = datost[, 2:7])
## Correlation matrix
## SO2 PM10 Reactivacion_Comercial Supermercado_Farmacia
## SO2 1.00 0.61 0.40 0.44
## PM10 0.61 1.00 0.28 0.28
## Reactivacion_Comercial 0.40 0.28 1.00 0.87
## Supermercado_Farmacia 0.44 0.28 0.87 1.00
## Parques_Centros 0.02 0.07 0.81 0.66
## Estaciones_Transito 0.40 0.27 0.94 0.87
## Parques_Centros Estaciones_Transito
## SO2 0.02 0.40
## PM10 0.07 0.27
## Reactivacion_Comercial 0.81 0.94
## Supermercado_Farmacia 0.66 0.87
## Parques_Centros 1.00 0.77
## Estaciones_Transito 0.77 1.00
## Sample Size
## [1] 447
## Probability values (Entries above the diagonal are adjusted for multiple tests.)
## SO2 PM10 Reactivacion_Comercial Supermercado_Farmacia
## SO2 0.00 0.00 0 0
## PM10 0.00 0.00 0 0
## Reactivacion_Comercial 0.00 0.00 0 0
## Supermercado_Farmacia 0.00 0.00 0 0
## Parques_Centros 0.69 0.17 0 0
## Estaciones_Transito 0.00 0.00 0 0
## Parques_Centros Estaciones_Transito
## SO2 0.69 0
## PM10 0.33 0
## Reactivacion_Comercial 0.00 0
## Supermercado_Farmacia 0.00 0
## Parques_Centros 0.00 0
## Estaciones_Transito 0.00 0
##
## To see confidence intervals of the correlations, print with the short=FALSE option
correlaciones <- corr.test(datost[,2:7])#se crea la matriz de correlaciones
correlaciones$r #matriz de correlaciones
## SO2 PM10 Reactivacion_Comercial
## SO2 1.00000000 0.61169793 0.4043615
## PM10 0.61169793 1.00000000 0.2808423
## Reactivacion_Comercial 0.40436153 0.28084230 1.0000000
## Supermercado_Farmacia 0.44278531 0.28031731 0.8749668
## Parques_Centros 0.01908292 0.06546006 0.8137739
## Estaciones_Transito 0.39928523 0.26992440 0.9444005
## Supermercado_Farmacia Parques_Centros
## SO2 0.4427853 0.01908292
## PM10 0.2803173 0.06546006
## Reactivacion_Comercial 0.8749668 0.81377389
## Supermercado_Farmacia 1.0000000 0.65541398
## Parques_Centros 0.6554140 1.00000000
## Estaciones_Transito 0.8721979 0.77033431
## Estaciones_Transito
## SO2 0.3992852
## PM10 0.2699244
## Reactivacion_Comercial 0.9444005
## Supermercado_Farmacia 0.8721979
## Parques_Centros 0.7703343
## Estaciones_Transito 1.0000000
r <- as.matrix(correlaciones$r)
Alfa= 0,05 Si P value > alfa: no se rechaza la H0 Si P valie < alfa: se rechaza la H0
fa.parallel(r, fm="pa",n.obs = 447, ylabel = "Eigenvalues")
## Warning in fa.stats(r = r, f = f, phi = phi, n.obs = n.obs, np.obs = np.obs, :
## The estimated weights for the factor scores are probably incorrect. Try a
## different factor score estimation method.
## Parallel analysis suggests that the number of factors = 2 and the number of components = 2
fa.parallel(r, fm="pc",n.obs = 447, ylabel = "Eigenvalues")
## factor method not specified correctly, minimum residual (unweighted least squares used
## Warning in fa.stats(r = r, f = f, phi = phi, n.obs = n.obs, np.obs = np.obs, :
## The estimated weights for the factor scores are probably incorrect. Try a
## different factor score estimation method.
## factor method not specified correctly, minimum residual (unweighted least squares used
## factor method not specified correctly, minimum residual (unweighted least squares used
## factor method not specified correctly, minimum residual (unweighted least squares used
## factor method not specified correctly, minimum residual (unweighted least squares used
## factor method not specified correctly, minimum residual (unweighted least squares used
## factor method not specified correctly, minimum residual (unweighted least squares used
## factor method not specified correctly, minimum residual (unweighted least squares used
## factor method not specified correctly, minimum residual (unweighted least squares used
## factor method not specified correctly, minimum residual (unweighted least squares used
## factor method not specified correctly, minimum residual (unweighted least squares used
## factor method not specified correctly, minimum residual (unweighted least squares used
## factor method not specified correctly, minimum residual (unweighted least squares used
## factor method not specified correctly, minimum residual (unweighted least squares used
## factor method not specified correctly, minimum residual (unweighted least squares used
## factor method not specified correctly, minimum residual (unweighted least squares used
## factor method not specified correctly, minimum residual (unweighted least squares used
## factor method not specified correctly, minimum residual (unweighted least squares used
## factor method not specified correctly, minimum residual (unweighted least squares used
## factor method not specified correctly, minimum residual (unweighted least squares used
## factor method not specified correctly, minimum residual (unweighted least squares used
## Parallel analysis suggests that the number of factors = 2 and the number of components = 2
fa.parallel(r, fm= "ml", n.obs= 447, ylabel = "Eigenvalues")
## Parallel analysis suggests that the number of factors = 2 and the number of components = 2
library(paran)
## Warning: package 'paran' was built under R version 4.1.3
## Loading required package: MASS
paran(r, iterations= 1000, graph= T)
##
## Using eigendecomposition of correlation matrix.
## Computing: 10% 20% 30% 40% 50% 60% 70% 80% 90% 100%
##
##
## Results of Horn's Parallel Analysis for component retention
## 1000 iterations, using the mean estimate
##
## --------------------------------------------------
## Component Adjusted Unadjusted Estimated
## Eigenvalue Eigenvalue Bias
## --------------------------------------------------
## 1 3.322950 5.071623 1.748672
## --------------------------------------------------
##
## Adjusted eigenvalues > 1 indicate dimensions to retain.
## (1 components retained)
acp<- principal(r, nfactor=1, rotate = "none")
acp
## Principal Components Analysis
## Call: principal(r = r, nfactors = 1, rotate = "none")
## Standardized loadings (pattern matrix) based upon correlation matrix
## PC1 h2 u2 com
## SO2 0.53 0.28 0.724 1
## PM10 0.42 0.18 0.824 1
## Reactivacion_Comercial 0.97 0.94 0.063 1
## Supermercado_Farmacia 0.92 0.85 0.153 1
## Parques_Centros 0.78 0.61 0.387 1
## Estaciones_Transito 0.96 0.91 0.086 1
##
## PC1
## SS loadings 3.76
## Proportion Var 0.63
##
## Mean item complexity = 1
## Test of the hypothesis that 1 component is sufficient.
##
## The root mean square of the residuals (RMSR) is 0.17
##
## Fit based upon off diagonal values = 0.91
cpi<- fa(r,nfactors = 1, fm= "pa", rotate= "none", n.obs = 447)
## Warning in fa.stats(r = r, f = f, phi = phi, n.obs = n.obs, np.obs = np.obs, :
## The estimated weights for the factor scores are probably incorrect. Try a
## different factor score estimation method.
cpi
## Factor Analysis using method = pa
## Call: fa(r = r, nfactors = 1, n.obs = 447, rotate = "none", fm = "pa")
## Standardized loadings (pattern matrix) based upon correlation matrix
## PA1 h2 u2 com
## SO2 0.41 0.17 0.828 1
## PM10 0.32 0.10 0.899 1
## Reactivacion_Comercial 1.00 1.00 0.002 1
## Supermercado_Farmacia 0.90 0.81 0.187 1
## Parques_Centros 0.72 0.52 0.482 1
## Estaciones_Transito 0.97 0.95 0.051 1
##
## PA1
## SS loadings 3.55
## Proportion Var 0.59
##
## Mean item complexity = 1
## Test of the hypothesis that 1 factor is sufficient.
##
## The degrees of freedom for the null model are 15 and the objective function was 5.98 with Chi Square of 2649.12
## The degrees of freedom for the model are 9 and the objective function was 1.4
##
## The root mean square of the residuals (RMSR) is 0.15
## The df corrected root mean square of the residuals is 0.2
##
## The harmonic number of observations is 447 with the empirical chi square 320.22 with prob < 1.3e-63
## The total number of observations was 447 with Likelihood Chi Square = 619.68 with prob < 1.2e-127
##
## Tucker Lewis Index of factoring reliability = 0.613
## RMSEA index = 0.39 and the 90 % confidence intervals are 0.364 0.416
## BIC = 564.76
## Fit based upon off diagonal values = 0.93
mve<- fa(r, nfactor=1, fm= "ml", rotate = "none", n.obs = 447)
mve
## Factor Analysis using method = ml
## Call: fa(r = r, nfactors = 1, n.obs = 447, rotate = "none", fm = "ml")
## Standardized loadings (pattern matrix) based upon correlation matrix
## ML1 h2 u2 com
## SO2 0.41 0.17 0.835 1
## PM10 0.28 0.08 0.920 1
## Reactivacion_Comercial 0.99 0.97 0.026 1
## Supermercado_Farmacia 0.89 0.79 0.208 1
## Parques_Centros 0.81 0.66 0.344 1
## Estaciones_Transito 0.96 0.92 0.081 1
##
## ML1
## SS loadings 3.59
## Proportion Var 0.60
##
## Mean item complexity = 1
## Test of the hypothesis that 1 factor is sufficient.
##
## The degrees of freedom for the null model are 15 and the objective function was 5.98 with Chi Square of 2649.12
## The degrees of freedom for the model are 9 and the objective function was 0.93
##
## The root mean square of the residuals (RMSR) is 0.16
## The df corrected root mean square of the residuals is 0.21
##
## The harmonic number of observations is 447 with the empirical chi square 341.47 with prob < 4.1e-68
## The total number of observations was 447 with Likelihood Chi Square = 412.96 with prob < 2.3e-83
##
## Tucker Lewis Index of factoring reliability = 0.744
## RMSEA index = 0.317 and the 90 % confidence intervals are 0.292 0.344
## BIC = 358.04
## Fit based upon off diagonal values = 0.93
## Measures of factor score adequacy
## ML1
## Correlation of (regression) scores with factors 0.99
## Multiple R square of scores with factors 0.98
## Minimum correlation of possible factor scores 0.96
plot(acp, labels = row.names(r), cex=.7, ylim=c(-.8,.8))
plot(cpi, labels = row.names(r), cex=.7, ylim=c(-.8,.8))
plot(mve, labels = row.names(r), cex=1, ylim=c(-.8,.8))
acp1<- principal(datost[,2:7], nfactors = 1, rotate= "none", scores = T)
#acp1$scores
puntuacionesfactoriales_acp<- acp1$scores
puntuacionesfactoriales_acp<- as.data.frame(puntuacionesfactoriales_acp)
cpi1<- fa(datost[,2:7], nfactors = 1, rotate= "none",n.obs = 447, scores = "regression")
## Warning in fa.stats(r = r, f = f, phi = phi, n.obs = n.obs, np.obs = np.obs, :
## The estimated weights for the factor scores are probably incorrect. Try a
## different factor score estimation method.
#cpi1$scores
puntfact_cpi<- cpi1$scores
puntfact_cpi<- as.data.frame(puntfact_cpi)
mve1<- fa(datost[,2:7], nfactors = 1, rotate= "none",n.obs = 447, scores = "regression")
## Warning in fa.stats(r = r, f = f, phi = phi, n.obs = n.obs, np.obs = np.obs, :
## The estimated weights for the factor scores are probably incorrect. Try a
## different factor score estimation method.
# mve1$scores
puntfact_mve<- mve1$scores
puntfact_mve<- as.data.frame(puntfact_mve)
Aqui se trabaja con el metodo que el investigador decida (ACP, CPI, MVE).
factor.scores(r, acp, method = "Thurstone")
## $scores
## NULL
##
## $weights
## PC1
## SO2 0.1395761
## PM10 0.1115274
## Reactivacion_Comercial 0.2571485
## Supermercado_Farmacia 0.2445143
## Parques_Centros 0.2080484
## Estaciones_Transito 0.2540099
##
## $r.scores
## PC1
## PC1 1
##
## $R2
## [1] 1
datos_puntuaciones<-c(datos,puntuacionesfactoriales_acp)
datos_puntuaciones<- as.data.frame(datos_puntuaciones)