IMPORTAR LA BASE DE DATOS

  library(readxl)

## Warning: package 'readxl' was built under R version 4.1.3

  datos <- read_excel("D:/Documentos HDD/Cuarto semestre/Estadistica Aplicada/ea9am/Concentracion.xlsx")

TIPIFICACION O ESTANDARIZACION DE VARIABLES

  datost<- datos 
  datost<- scale(datost, center= T, scale= T)
  datost<- as.data.frame(datost)

NORMALIDAD MULTIVARIANTE

H0: Normalidad multivariante H1: No normalidad multivariante Confianza= 95% Alfa= 5% = 0,05 P value > alfa: no se rechaza la H0 (Normalidad) P value > alfa: se rechaza la H0 (No normalidad)

  library(MVN)

## Warning: package 'MVN' was built under R version 4.1.3

  mvn(datost[2:7])

## $multivariateNormality
##            Test       HZ p value MVN
## 1 Henze-Zirkler 4.906423       0  NO
## 
## $univariateNormality
##               Test               Variable Statistic   p value Normality
## 1 Anderson-Darling          SO2              3.0767  <0.001      NO    
## 2 Anderson-Darling          PM10             4.9622  <0.001      NO    
## 3 Anderson-Darling Reactivacion_Comercial    1.9184   1e-04      NO    
## 4 Anderson-Darling Supermercado_Farmacia     2.7693  <0.001      NO    
## 5 Anderson-Darling    Parques_Centros        9.9286  <0.001      NO    
## 6 Anderson-Darling  Estaciones_Transito      2.5745  <0.001      NO    
## 
## $Descriptives
##                          n          Mean Std.Dev      Median       Min      Max
## SO2                    447 -6.189242e-17       1  0.06965129 -2.004845 3.393844
## PM10                   447 -3.051938e-17       1 -0.14037071 -3.579765 3.888564
## Reactivacion_Comercial 447 -6.585740e-17       1  0.01854640 -2.392092 2.252796
## Supermercado_Farmacia  447  3.998838e-17       1 -0.12681933 -2.587563 3.792143
## Parques_Centros        447  7.320669e-18       1 -0.08011009 -2.441994 3.348431
## Estaciones_Transito    447  9.135919e-18       1 -0.05730576 -2.182862 3.376285
##                              25th      75th       Skew   Kurtosis
## SO2                    -0.8867283 0.7900646 0.14664441 -0.7193595
## PM10                   -0.7341725 0.5271334 0.73992750  1.0482736
## Reactivacion_Comercial -0.6870062 0.6065069 0.01598432 -0.2850923
## Supermercado_Farmacia  -0.5825126 0.6022899 0.44341119  0.8788276
## Parques_Centros        -0.5372489 0.4151237 0.97018812  1.9155406
## Estaciones_Transito    -0.6568215 0.5422100 0.34792081 -0.1004907

Como el p value > alfa, no se rechaza la H0, por lo tanto existe la normalidad multivariante.

MATRIZ DE CORRELACIONES

  library(psych)

## Warning: package 'psych' was built under R version 4.1.3

  corr.test(datost[,2:7])

## Call:corr.test(x = datost[, 2:7])
## Correlation matrix 
##                         SO2 PM10 Reactivacion_Comercial Supermercado_Farmacia
## SO2                    1.00 0.61                   0.40                  0.44
## PM10                   0.61 1.00                   0.28                  0.28
## Reactivacion_Comercial 0.40 0.28                   1.00                  0.87
## Supermercado_Farmacia  0.44 0.28                   0.87                  1.00
## Parques_Centros        0.02 0.07                   0.81                  0.66
## Estaciones_Transito    0.40 0.27                   0.94                  0.87
##                        Parques_Centros Estaciones_Transito
## SO2                               0.02                0.40
## PM10                              0.07                0.27
## Reactivacion_Comercial            0.81                0.94
## Supermercado_Farmacia             0.66                0.87
## Parques_Centros                   1.00                0.77
## Estaciones_Transito               0.77                1.00
## Sample Size 
## [1] 447
## Probability values (Entries above the diagonal are adjusted for multiple tests.) 
##                         SO2 PM10 Reactivacion_Comercial Supermercado_Farmacia
## SO2                    0.00 0.00                      0                     0
## PM10                   0.00 0.00                      0                     0
## Reactivacion_Comercial 0.00 0.00                      0                     0
## Supermercado_Farmacia  0.00 0.00                      0                     0
## Parques_Centros        0.69 0.17                      0                     0
## Estaciones_Transito    0.00 0.00                      0                     0
##                        Parques_Centros Estaciones_Transito
## SO2                               0.69                   0
## PM10                              0.33                   0
## Reactivacion_Comercial            0.00                   0
## Supermercado_Farmacia             0.00                   0
## Parques_Centros                   0.00                   0
## Estaciones_Transito               0.00                   0
## 
##  To see confidence intervals of the correlations, print with the short=FALSE option

  correlaciones <- corr.test(datost[,2:7])#se crea la matriz de correlaciones
  correlaciones$r #matriz de correlaciones

##                               SO2       PM10 Reactivacion_Comercial
## SO2                    1.00000000 0.61169793              0.4043615
## PM10                   0.61169793 1.00000000              0.2808423
## Reactivacion_Comercial 0.40436153 0.28084230              1.0000000
## Supermercado_Farmacia  0.44278531 0.28031731              0.8749668
## Parques_Centros        0.01908292 0.06546006              0.8137739
## Estaciones_Transito    0.39928523 0.26992440              0.9444005
##                        Supermercado_Farmacia Parques_Centros
## SO2                                0.4427853      0.01908292
## PM10                               0.2803173      0.06546006
## Reactivacion_Comercial             0.8749668      0.81377389
## Supermercado_Farmacia              1.0000000      0.65541398
## Parques_Centros                    0.6554140      1.00000000
## Estaciones_Transito                0.8721979      0.77033431
##                        Estaciones_Transito
## SO2                              0.3992852
## PM10                             0.2699244
## Reactivacion_Comercial           0.9444005
## Supermercado_Farmacia            0.8721979
## Parques_Centros                  0.7703343
## Estaciones_Transito              1.0000000

  r <- as.matrix(correlaciones$r)

Alfa= 0,05 Si P value > alfa: no se rechaza la H0 Si P valie < alfa: se rechaza la H0

DETERMINACION DEL NUMERO DE FACTORES A EXTRAER

Metodo de las componentes principales iteradas

  fa.parallel(r, fm="pa",n.obs = 447, ylabel = "Eigenvalues")

## Warning in fa.stats(r = r, f = f, phi = phi, n.obs = n.obs, np.obs = np.obs, :
## The estimated weights for the factor scores are probably incorrect. Try a
## different factor score estimation method.

## Parallel analysis suggests that the number of factors =  2  and the number of components =  2

Metodo de las componentes principales

  fa.parallel(r, fm="pc",n.obs = 447, ylabel = "Eigenvalues")

## factor method not specified correctly, minimum residual (unweighted least squares  used

## Warning in fa.stats(r = r, f = f, phi = phi, n.obs = n.obs, np.obs = np.obs, :
## The estimated weights for the factor scores are probably incorrect. Try a
## different factor score estimation method.

## factor method not specified correctly, minimum residual (unweighted least squares  used
## factor method not specified correctly, minimum residual (unweighted least squares  used
## factor method not specified correctly, minimum residual (unweighted least squares  used
## factor method not specified correctly, minimum residual (unweighted least squares  used
## factor method not specified correctly, minimum residual (unweighted least squares  used
## factor method not specified correctly, minimum residual (unweighted least squares  used
## factor method not specified correctly, minimum residual (unweighted least squares  used
## factor method not specified correctly, minimum residual (unweighted least squares  used
## factor method not specified correctly, minimum residual (unweighted least squares  used
## factor method not specified correctly, minimum residual (unweighted least squares  used
## factor method not specified correctly, minimum residual (unweighted least squares  used
## factor method not specified correctly, minimum residual (unweighted least squares  used
## factor method not specified correctly, minimum residual (unweighted least squares  used
## factor method not specified correctly, minimum residual (unweighted least squares  used
## factor method not specified correctly, minimum residual (unweighted least squares  used
## factor method not specified correctly, minimum residual (unweighted least squares  used
## factor method not specified correctly, minimum residual (unweighted least squares  used
## factor method not specified correctly, minimum residual (unweighted least squares  used
## factor method not specified correctly, minimum residual (unweighted least squares  used
## factor method not specified correctly, minimum residual (unweighted least squares  used

## Parallel analysis suggests that the number of factors =  2  and the number of components =  2

Metodo de la maxima verosimilitud

  fa.parallel(r, fm= "ml", n.obs= 447, ylabel = "Eigenvalues")

## Parallel analysis suggests that the number of factors =  2  and the number of components =  2

Metodo paralelo con iteraciones

  library(paran)

## Warning: package 'paran' was built under R version 4.1.3

## Loading required package: MASS

  paran(r, iterations= 1000, graph= T)

## 
## Using eigendecomposition of correlation matrix.
## Computing: 10%  20%  30%  40%  50%  60%  70%  80%  90%  100%
## 
## 
## Results of Horn's Parallel Analysis for component retention
## 1000 iterations, using the mean estimate
## 
## -------------------------------------------------- 
## Component   Adjusted    Unadjusted    Estimated 
##             Eigenvalue  Eigenvalue    Bias 
## -------------------------------------------------- 
## 1           3.322950    5.071623      1.748672
## -------------------------------------------------- 
## 
## Adjusted eigenvalues > 1 indicate dimensions to retain.
## (1 components retained)

EXTRACCION DE FACTORES

Metodo de Analisis de los componentes principales (ACP)

  acp<- principal(r, nfactor=1, rotate = "none")
  acp

## Principal Components Analysis
## Call: principal(r = r, nfactors = 1, rotate = "none")
## Standardized loadings (pattern matrix) based upon correlation matrix
##                         PC1   h2    u2 com
## SO2                    0.53 0.28 0.724   1
## PM10                   0.42 0.18 0.824   1
## Reactivacion_Comercial 0.97 0.94 0.063   1
## Supermercado_Farmacia  0.92 0.85 0.153   1
## Parques_Centros        0.78 0.61 0.387   1
## Estaciones_Transito    0.96 0.91 0.086   1
## 
##                 PC1
## SS loadings    3.76
## Proportion Var 0.63
## 
## Mean item complexity =  1
## Test of the hypothesis that 1 component is sufficient.
## 
## The root mean square of the residuals (RMSR) is  0.17 
## 
## Fit based upon off diagonal values = 0.91

Metodo de los ejes principales o componentes principales iteradas (CPI)

  cpi<- fa(r,nfactors = 1, fm= "pa", rotate= "none", n.obs =  447)

## Warning in fa.stats(r = r, f = f, phi = phi, n.obs = n.obs, np.obs = np.obs, :
## The estimated weights for the factor scores are probably incorrect. Try a
## different factor score estimation method.

cpi

## Factor Analysis using method =  pa
## Call: fa(r = r, nfactors = 1, n.obs = 447, rotate = "none", fm = "pa")
## Standardized loadings (pattern matrix) based upon correlation matrix
##                         PA1   h2    u2 com
## SO2                    0.41 0.17 0.828   1
## PM10                   0.32 0.10 0.899   1
## Reactivacion_Comercial 1.00 1.00 0.002   1
## Supermercado_Farmacia  0.90 0.81 0.187   1
## Parques_Centros        0.72 0.52 0.482   1
## Estaciones_Transito    0.97 0.95 0.051   1
## 
##                 PA1
## SS loadings    3.55
## Proportion Var 0.59
## 
## Mean item complexity =  1
## Test of the hypothesis that 1 factor is sufficient.
## 
## The degrees of freedom for the null model are  15  and the objective function was  5.98 with Chi Square of  2649.12
## The degrees of freedom for the model are 9  and the objective function was  1.4 
## 
## The root mean square of the residuals (RMSR) is  0.15 
## The df corrected root mean square of the residuals is  0.2 
## 
## The harmonic number of observations is  447 with the empirical chi square  320.22  with prob <  1.3e-63 
## The total number of observations was  447  with Likelihood Chi Square =  619.68  with prob <  1.2e-127 
## 
## Tucker Lewis Index of factoring reliability =  0.613
## RMSEA index =  0.39  and the 90 % confidence intervals are  0.364 0.416
## BIC =  564.76
## Fit based upon off diagonal values = 0.93

Metodo de maxima verosimilitud (MVE)

  mve<- fa(r, nfactor=1, fm= "ml", rotate = "none", n.obs =  447)
  mve

## Factor Analysis using method =  ml
## Call: fa(r = r, nfactors = 1, n.obs = 447, rotate = "none", fm = "ml")
## Standardized loadings (pattern matrix) based upon correlation matrix
##                         ML1   h2    u2 com
## SO2                    0.41 0.17 0.835   1
## PM10                   0.28 0.08 0.920   1
## Reactivacion_Comercial 0.99 0.97 0.026   1
## Supermercado_Farmacia  0.89 0.79 0.208   1
## Parques_Centros        0.81 0.66 0.344   1
## Estaciones_Transito    0.96 0.92 0.081   1
## 
##                 ML1
## SS loadings    3.59
## Proportion Var 0.60
## 
## Mean item complexity =  1
## Test of the hypothesis that 1 factor is sufficient.
## 
## The degrees of freedom for the null model are  15  and the objective function was  5.98 with Chi Square of  2649.12
## The degrees of freedom for the model are 9  and the objective function was  0.93 
## 
## The root mean square of the residuals (RMSR) is  0.16 
## The df corrected root mean square of the residuals is  0.21 
## 
## The harmonic number of observations is  447 with the empirical chi square  341.47  with prob <  4.1e-68 
## The total number of observations was  447  with Likelihood Chi Square =  412.96  with prob <  2.3e-83 
## 
## Tucker Lewis Index of factoring reliability =  0.744
## RMSEA index =  0.317  and the 90 % confidence intervals are  0.292 0.344
## BIC =  358.04
## Fit based upon off diagonal values = 0.93
## Measures of factor score adequacy             
##                                                    ML1
## Correlation of (regression) scores with factors   0.99
## Multiple R square of scores with factors          0.98
## Minimum correlation of possible factor scores     0.96

REPRESENTACION GRAFICA DE LOS FACTORES EXTRAIDOS

Metodo de analisis de las componentes principales (ACP)

  plot(acp, labels = row.names(r), cex=.7, ylim=c(-.8,.8))

Metodo de las componentes principales iteradas (CPI)

  plot(cpi, labels = row.names(r), cex=.7, ylim=c(-.8,.8))

Metodo de la maxima verosimilitud (MVE)

  plot(mve, labels = row.names(r), cex=1, ylim=c(-.8,.8))

OBTENCION DE LAS PUNTUACIONES FACTORIALES

Metodo de analisis de las componentes principales iteradas

  acp1<- principal(datost[,2:7], nfactors = 1, rotate= "none", scores = T)
  #acp1$scores
  puntuacionesfactoriales_acp<- acp1$scores
  puntuacionesfactoriales_acp<- as.data.frame(puntuacionesfactoriales_acp)

Metodo de las componentes principales iteradas (CPI)

  cpi1<- fa(datost[,2:7], nfactors = 1, rotate= "none",n.obs = 447, scores = "regression")

## Warning in fa.stats(r = r, f = f, phi = phi, n.obs = n.obs, np.obs = np.obs, :
## The estimated weights for the factor scores are probably incorrect. Try a
## different factor score estimation method.

  #cpi1$scores
  puntfact_cpi<- cpi1$scores
  puntfact_cpi<- as.data.frame(puntfact_cpi)

Metodo de la maxima verosimilitud

  mve1<- fa(datost[,2:7], nfactors = 1, rotate= "none",n.obs = 447, scores = "regression")

## Warning in fa.stats(r = r, f = f, phi = phi, n.obs = n.obs, np.obs = np.obs, :
## The estimated weights for the factor scores are probably incorrect. Try a
## different factor score estimation method.

  # mve1$scores
  puntfact_mve<- mve1$scores
  puntfact_mve<- as.data.frame(puntfact_mve)

OBTENCION DE LOS FACTORES EXTRAIDOS

Aqui se trabaja con el metodo que el investigador decida (ACP, CPI, MVE).

  factor.scores(r, acp, method = "Thurstone")

## $scores
## NULL
## 
## $weights
##                              PC1
## SO2                    0.1395761
## PM10                   0.1115274
## Reactivacion_Comercial 0.2571485
## Supermercado_Farmacia  0.2445143
## Parques_Centros        0.2080484
## Estaciones_Transito    0.2540099
## 
## $r.scores
##     PC1
## PC1   1
## 
## $R2
## [1] 1

AGREGAR FACTOR EXTRAIDO (PUNTUACIONES FACTORIALES) EN EL DATA FRAME ORIGINAL

  datos_puntuaciones<-c(datos,puntuacionesfactoriales_acp)
  datos_puntuaciones<- as.data.frame(datos_puntuaciones)

Analisis Factorial

Roberto Rojas Wilson - Antonio Del Pardo Espinoza - Jesus Gabriel Valencia Camacho - Marco Antonio Irineo Inzunza

31/3/2022

IMPORTAR LA BASE DE DATOS

TIPIFICACION O ESTANDARIZACION DE VARIABLES

NORMALIDAD MULTIVARIANTE

MATRIZ DE CORRELACIONES

DETERMINACION DEL NUMERO DE FACTORES A EXTRAER

Metodo de las componentes principales iteradas

Metodo de las componentes principales

Metodo de la maxima verosimilitud

Metodo paralelo con iteraciones

EXTRACCION DE FACTORES

Metodo de Analisis de los componentes principales (ACP)

Metodo de los ejes principales o componentes principales iteradas (CPI)

Metodo de maxima verosimilitud (MVE)

REPRESENTACION GRAFICA DE LOS FACTORES EXTRAIDOS

Metodo de analisis de las componentes principales (ACP)

Metodo de las componentes principales iteradas (CPI)

Metodo de la maxima verosimilitud (MVE)

OBTENCION DE LAS PUNTUACIONES FACTORIALES

Metodo de analisis de las componentes principales iteradas

Metodo de las componentes principales iteradas (CPI)

Metodo de la maxima verosimilitud

OBTENCION DE LOS FACTORES EXTRAIDOS

AGREGAR FACTOR EXTRAIDO (PUNTUACIONES FACTORIALES) EN EL DATA FRAME ORIGINAL