IMPORTAR LA BASE DE DATOS

  library(readxl)
## Warning: package 'readxl' was built under R version 4.1.3
  datos <- read_excel("D:/Documentos HDD/Cuarto semestre/Estadistica Aplicada/ea9am/Concentracion.xlsx")

TIPIFICACION O ESTANDARIZACION DE VARIABLES

  datost<- datos 
  datost<- scale(datost, center= T, scale= T)
  datost<- as.data.frame(datost)

NORMALIDAD MULTIVARIANTE

H0: Normalidad multivariante H1: No normalidad multivariante Confianza= 95% Alfa= 5% = 0,05 P value > alfa: no se rechaza la H0 (Normalidad) P value > alfa: se rechaza la H0 (No normalidad)

  library(MVN)
## Warning: package 'MVN' was built under R version 4.1.3
  mvn(datost[2:7])
## $multivariateNormality
##            Test       HZ p value MVN
## 1 Henze-Zirkler 4.906423       0  NO
## 
## $univariateNormality
##               Test               Variable Statistic   p value Normality
## 1 Anderson-Darling          SO2              3.0767  <0.001      NO    
## 2 Anderson-Darling          PM10             4.9622  <0.001      NO    
## 3 Anderson-Darling Reactivacion_Comercial    1.9184   1e-04      NO    
## 4 Anderson-Darling Supermercado_Farmacia     2.7693  <0.001      NO    
## 5 Anderson-Darling    Parques_Centros        9.9286  <0.001      NO    
## 6 Anderson-Darling  Estaciones_Transito      2.5745  <0.001      NO    
## 
## $Descriptives
##                          n          Mean Std.Dev      Median       Min      Max
## SO2                    447 -6.189242e-17       1  0.06965129 -2.004845 3.393844
## PM10                   447 -3.051938e-17       1 -0.14037071 -3.579765 3.888564
## Reactivacion_Comercial 447 -6.585740e-17       1  0.01854640 -2.392092 2.252796
## Supermercado_Farmacia  447  3.998838e-17       1 -0.12681933 -2.587563 3.792143
## Parques_Centros        447  7.320669e-18       1 -0.08011009 -2.441994 3.348431
## Estaciones_Transito    447  9.135919e-18       1 -0.05730576 -2.182862 3.376285
##                              25th      75th       Skew   Kurtosis
## SO2                    -0.8867283 0.7900646 0.14664441 -0.7193595
## PM10                   -0.7341725 0.5271334 0.73992750  1.0482736
## Reactivacion_Comercial -0.6870062 0.6065069 0.01598432 -0.2850923
## Supermercado_Farmacia  -0.5825126 0.6022899 0.44341119  0.8788276
## Parques_Centros        -0.5372489 0.4151237 0.97018812  1.9155406
## Estaciones_Transito    -0.6568215 0.5422100 0.34792081 -0.1004907

Como el p value > alfa, no se rechaza la H0, por lo tanto existe la normalidad multivariante.

MATRIZ DE CORRELACIONES

  library(psych)
## Warning: package 'psych' was built under R version 4.1.3
  corr.test(datost[,2:7])
## Call:corr.test(x = datost[, 2:7])
## Correlation matrix 
##                         SO2 PM10 Reactivacion_Comercial Supermercado_Farmacia
## SO2                    1.00 0.61                   0.40                  0.44
## PM10                   0.61 1.00                   0.28                  0.28
## Reactivacion_Comercial 0.40 0.28                   1.00                  0.87
## Supermercado_Farmacia  0.44 0.28                   0.87                  1.00
## Parques_Centros        0.02 0.07                   0.81                  0.66
## Estaciones_Transito    0.40 0.27                   0.94                  0.87
##                        Parques_Centros Estaciones_Transito
## SO2                               0.02                0.40
## PM10                              0.07                0.27
## Reactivacion_Comercial            0.81                0.94
## Supermercado_Farmacia             0.66                0.87
## Parques_Centros                   1.00                0.77
## Estaciones_Transito               0.77                1.00
## Sample Size 
## [1] 447
## Probability values (Entries above the diagonal are adjusted for multiple tests.) 
##                         SO2 PM10 Reactivacion_Comercial Supermercado_Farmacia
## SO2                    0.00 0.00                      0                     0
## PM10                   0.00 0.00                      0                     0
## Reactivacion_Comercial 0.00 0.00                      0                     0
## Supermercado_Farmacia  0.00 0.00                      0                     0
## Parques_Centros        0.69 0.17                      0                     0
## Estaciones_Transito    0.00 0.00                      0                     0
##                        Parques_Centros Estaciones_Transito
## SO2                               0.69                   0
## PM10                              0.33                   0
## Reactivacion_Comercial            0.00                   0
## Supermercado_Farmacia             0.00                   0
## Parques_Centros                   0.00                   0
## Estaciones_Transito               0.00                   0
## 
##  To see confidence intervals of the correlations, print with the short=FALSE option
  correlaciones <- corr.test(datost[,2:7])#se crea la matriz de correlaciones
  correlaciones$r #matriz de correlaciones
##                               SO2       PM10 Reactivacion_Comercial
## SO2                    1.00000000 0.61169793              0.4043615
## PM10                   0.61169793 1.00000000              0.2808423
## Reactivacion_Comercial 0.40436153 0.28084230              1.0000000
## Supermercado_Farmacia  0.44278531 0.28031731              0.8749668
## Parques_Centros        0.01908292 0.06546006              0.8137739
## Estaciones_Transito    0.39928523 0.26992440              0.9444005
##                        Supermercado_Farmacia Parques_Centros
## SO2                                0.4427853      0.01908292
## PM10                               0.2803173      0.06546006
## Reactivacion_Comercial             0.8749668      0.81377389
## Supermercado_Farmacia              1.0000000      0.65541398
## Parques_Centros                    0.6554140      1.00000000
## Estaciones_Transito                0.8721979      0.77033431
##                        Estaciones_Transito
## SO2                              0.3992852
## PM10                             0.2699244
## Reactivacion_Comercial           0.9444005
## Supermercado_Farmacia            0.8721979
## Parques_Centros                  0.7703343
## Estaciones_Transito              1.0000000
  r <- as.matrix(correlaciones$r)

Alfa= 0,05 Si P value > alfa: no se rechaza la H0 Si P valie < alfa: se rechaza la H0

DETERMINACION DEL NUMERO DE FACTORES A EXTRAER

Metodo de las componentes principales iteradas

  fa.parallel(r, fm="pa",n.obs = 447, ylabel = "Eigenvalues")
## Warning in fa.stats(r = r, f = f, phi = phi, n.obs = n.obs, np.obs = np.obs, :
## The estimated weights for the factor scores are probably incorrect. Try a
## different factor score estimation method.

## Parallel analysis suggests that the number of factors =  2  and the number of components =  2

Metodo de las componentes principales

  fa.parallel(r, fm="pc",n.obs = 447, ylabel = "Eigenvalues")
## factor method not specified correctly, minimum residual (unweighted least squares  used
## Warning in fa.stats(r = r, f = f, phi = phi, n.obs = n.obs, np.obs = np.obs, :
## The estimated weights for the factor scores are probably incorrect. Try a
## different factor score estimation method.
## factor method not specified correctly, minimum residual (unweighted least squares  used
## factor method not specified correctly, minimum residual (unweighted least squares  used
## factor method not specified correctly, minimum residual (unweighted least squares  used
## factor method not specified correctly, minimum residual (unweighted least squares  used
## factor method not specified correctly, minimum residual (unweighted least squares  used
## factor method not specified correctly, minimum residual (unweighted least squares  used
## factor method not specified correctly, minimum residual (unweighted least squares  used
## factor method not specified correctly, minimum residual (unweighted least squares  used
## factor method not specified correctly, minimum residual (unweighted least squares  used
## factor method not specified correctly, minimum residual (unweighted least squares  used
## factor method not specified correctly, minimum residual (unweighted least squares  used
## factor method not specified correctly, minimum residual (unweighted least squares  used
## factor method not specified correctly, minimum residual (unweighted least squares  used
## factor method not specified correctly, minimum residual (unweighted least squares  used
## factor method not specified correctly, minimum residual (unweighted least squares  used
## factor method not specified correctly, minimum residual (unweighted least squares  used
## factor method not specified correctly, minimum residual (unweighted least squares  used
## factor method not specified correctly, minimum residual (unweighted least squares  used
## factor method not specified correctly, minimum residual (unweighted least squares  used
## factor method not specified correctly, minimum residual (unweighted least squares  used

## Parallel analysis suggests that the number of factors =  2  and the number of components =  2

Metodo de la maxima verosimilitud

  fa.parallel(r, fm= "ml", n.obs= 447, ylabel = "Eigenvalues")

## Parallel analysis suggests that the number of factors =  2  and the number of components =  2

Metodo paralelo con iteraciones

  library(paran)
## Warning: package 'paran' was built under R version 4.1.3
## Loading required package: MASS
  paran(r, iterations= 1000, graph= T)
## 
## Using eigendecomposition of correlation matrix.
## Computing: 10%  20%  30%  40%  50%  60%  70%  80%  90%  100%
## 
## 
## Results of Horn's Parallel Analysis for component retention
## 1000 iterations, using the mean estimate
## 
## -------------------------------------------------- 
## Component   Adjusted    Unadjusted    Estimated 
##             Eigenvalue  Eigenvalue    Bias 
## -------------------------------------------------- 
## 1           3.322950    5.071623      1.748672
## -------------------------------------------------- 
## 
## Adjusted eigenvalues > 1 indicate dimensions to retain.
## (1 components retained)

EXTRACCION DE FACTORES

Metodo de Analisis de los componentes principales (ACP)

  acp<- principal(r, nfactor=1, rotate = "none")
  acp
## Principal Components Analysis
## Call: principal(r = r, nfactors = 1, rotate = "none")
## Standardized loadings (pattern matrix) based upon correlation matrix
##                         PC1   h2    u2 com
## SO2                    0.53 0.28 0.724   1
## PM10                   0.42 0.18 0.824   1
## Reactivacion_Comercial 0.97 0.94 0.063   1
## Supermercado_Farmacia  0.92 0.85 0.153   1
## Parques_Centros        0.78 0.61 0.387   1
## Estaciones_Transito    0.96 0.91 0.086   1
## 
##                 PC1
## SS loadings    3.76
## Proportion Var 0.63
## 
## Mean item complexity =  1
## Test of the hypothesis that 1 component is sufficient.
## 
## The root mean square of the residuals (RMSR) is  0.17 
## 
## Fit based upon off diagonal values = 0.91

Metodo de los ejes principales o componentes principales iteradas (CPI)

  cpi<- fa(r,nfactors = 1, fm= "pa", rotate= "none", n.obs =  447)
## Warning in fa.stats(r = r, f = f, phi = phi, n.obs = n.obs, np.obs = np.obs, :
## The estimated weights for the factor scores are probably incorrect. Try a
## different factor score estimation method.
  cpi
## Factor Analysis using method =  pa
## Call: fa(r = r, nfactors = 1, n.obs = 447, rotate = "none", fm = "pa")
## Standardized loadings (pattern matrix) based upon correlation matrix
##                         PA1   h2    u2 com
## SO2                    0.41 0.17 0.828   1
## PM10                   0.32 0.10 0.899   1
## Reactivacion_Comercial 1.00 1.00 0.002   1
## Supermercado_Farmacia  0.90 0.81 0.187   1
## Parques_Centros        0.72 0.52 0.482   1
## Estaciones_Transito    0.97 0.95 0.051   1
## 
##                 PA1
## SS loadings    3.55
## Proportion Var 0.59
## 
## Mean item complexity =  1
## Test of the hypothesis that 1 factor is sufficient.
## 
## The degrees of freedom for the null model are  15  and the objective function was  5.98 with Chi Square of  2649.12
## The degrees of freedom for the model are 9  and the objective function was  1.4 
## 
## The root mean square of the residuals (RMSR) is  0.15 
## The df corrected root mean square of the residuals is  0.2 
## 
## The harmonic number of observations is  447 with the empirical chi square  320.22  with prob <  1.3e-63 
## The total number of observations was  447  with Likelihood Chi Square =  619.68  with prob <  1.2e-127 
## 
## Tucker Lewis Index of factoring reliability =  0.613
## RMSEA index =  0.39  and the 90 % confidence intervals are  0.364 0.416
## BIC =  564.76
## Fit based upon off diagonal values = 0.93

Metodo de maxima verosimilitud (MVE)

  mve<- fa(r, nfactor=1, fm= "ml", rotate = "none", n.obs =  447)
  mve
## Factor Analysis using method =  ml
## Call: fa(r = r, nfactors = 1, n.obs = 447, rotate = "none", fm = "ml")
## Standardized loadings (pattern matrix) based upon correlation matrix
##                         ML1   h2    u2 com
## SO2                    0.41 0.17 0.835   1
## PM10                   0.28 0.08 0.920   1
## Reactivacion_Comercial 0.99 0.97 0.026   1
## Supermercado_Farmacia  0.89 0.79 0.208   1
## Parques_Centros        0.81 0.66 0.344   1
## Estaciones_Transito    0.96 0.92 0.081   1
## 
##                 ML1
## SS loadings    3.59
## Proportion Var 0.60
## 
## Mean item complexity =  1
## Test of the hypothesis that 1 factor is sufficient.
## 
## The degrees of freedom for the null model are  15  and the objective function was  5.98 with Chi Square of  2649.12
## The degrees of freedom for the model are 9  and the objective function was  0.93 
## 
## The root mean square of the residuals (RMSR) is  0.16 
## The df corrected root mean square of the residuals is  0.21 
## 
## The harmonic number of observations is  447 with the empirical chi square  341.47  with prob <  4.1e-68 
## The total number of observations was  447  with Likelihood Chi Square =  412.96  with prob <  2.3e-83 
## 
## Tucker Lewis Index of factoring reliability =  0.744
## RMSEA index =  0.317  and the 90 % confidence intervals are  0.292 0.344
## BIC =  358.04
## Fit based upon off diagonal values = 0.93
## Measures of factor score adequacy             
##                                                    ML1
## Correlation of (regression) scores with factors   0.99
## Multiple R square of scores with factors          0.98
## Minimum correlation of possible factor scores     0.96

REPRESENTACION GRAFICA DE LOS FACTORES EXTRAIDOS

Metodo de analisis de las componentes principales (ACP)

  plot(acp, labels = row.names(r), cex=.7, ylim=c(-.8,.8))

Metodo de las componentes principales iteradas (CPI)

  plot(cpi, labels = row.names(r), cex=.7, ylim=c(-.8,.8))

Metodo de la maxima verosimilitud (MVE)

  plot(mve, labels = row.names(r), cex=1, ylim=c(-.8,.8))

OBTENCION DE LAS PUNTUACIONES FACTORIALES

Metodo de analisis de las componentes principales iteradas

  acp1<- principal(datost[,2:7], nfactors = 1, rotate= "none", scores = T)
  #acp1$scores
  puntuacionesfactoriales_acp<- acp1$scores
  puntuacionesfactoriales_acp<- as.data.frame(puntuacionesfactoriales_acp)

Metodo de las componentes principales iteradas (CPI)

  cpi1<- fa(datost[,2:7], nfactors = 1, rotate= "none",n.obs = 447, scores = "regression")
## Warning in fa.stats(r = r, f = f, phi = phi, n.obs = n.obs, np.obs = np.obs, :
## The estimated weights for the factor scores are probably incorrect. Try a
## different factor score estimation method.
  #cpi1$scores
  puntfact_cpi<- cpi1$scores
  puntfact_cpi<- as.data.frame(puntfact_cpi)

Metodo de la maxima verosimilitud

  mve1<- fa(datost[,2:7], nfactors = 1, rotate= "none",n.obs = 447, scores = "regression")
## Warning in fa.stats(r = r, f = f, phi = phi, n.obs = n.obs, np.obs = np.obs, :
## The estimated weights for the factor scores are probably incorrect. Try a
## different factor score estimation method.
  # mve1$scores
  puntfact_mve<- mve1$scores
  puntfact_mve<- as.data.frame(puntfact_mve)

OBTENCION DE LOS FACTORES EXTRAIDOS

Aqui se trabaja con el metodo que el investigador decida (ACP, CPI, MVE).

  factor.scores(r, acp, method = "Thurstone")
## $scores
## NULL
## 
## $weights
##                              PC1
## SO2                    0.1395761
## PM10                   0.1115274
## Reactivacion_Comercial 0.2571485
## Supermercado_Farmacia  0.2445143
## Parques_Centros        0.2080484
## Estaciones_Transito    0.2540099
## 
## $r.scores
##     PC1
## PC1   1
## 
## $R2
## [1] 1

AGREGAR FACTOR EXTRAIDO (PUNTUACIONES FACTORIALES) EN EL DATA FRAME ORIGINAL

  datos_puntuaciones<-c(datos,puntuacionesfactoriales_acp)
  datos_puntuaciones<- as.data.frame(datos_puntuaciones)