1.- Considera la siguiente matriz de correlaciones de 6 variables para 100 observaciones. Tratar de identificar factores no observables que estén relacionados con estas variables. \(\begin{array}{rrrrrr}1 & 0.83 & 0.81 & 0.8 & 0.71 & 0.54 \\ 0.83 & 1 & 0.87 & 0.62 & 0.59 & 0.58 \\ 0.81 & 0.87 & 1 & 0.63 & 0.37 & 0.3 \\ 0.8 & 0.62 & 0.63 & 1 & 0.49 & 0.3 \\ 0.71 & 0.59 & 0.37 & 0.49 & 1 & 0.34 \\ 0.54 & 0.58 & 0.3 & 0.3 & 0.34 & 1\end{array}\)

2.- Se tiene la tasa de retorno semanal de 5 acciones bursátiles del NYSE. Tratar de identificar factores no observables que estén relacionados con estas variables.

3.- Se colectaron datos sobre la contaminación del aire en cierta ciudad. Tratar de identificar factores no observables que estén relacionados con estas variables.

Ejercicio 1

options(warn=-1)
library(psych)
#Matriz correlaciónes 
R <- matrix(c(1,.83,.81,.8,.71,.54,.83,1,.87,.62,.59,.58,.81,.87,1,.63,.37,.3,.8,.62,.63,1,.49,.3,.71,.59,.37,.49,1,.34,.54,.58,.3,.3,.34,1),ncol = 6,byrow = TRUE)

# Ho: La correlación entre cada par de variables es cero
# H1: La correlación entre cada par de variable diferente de cero

cortest.bartlett(R, n=100)
## $chisq
## [1] 665.5447
## 
## $p.value
## [1] 4.066112e-132
## 
## $df
## [1] 15
# Se infiere que si hay correlación entre las variables

#Analisis de componentes principales para determinar cual es el sweetspot de cantidad de componentes y por ende de factores que podríamos utilizar 
fa.parallel(R,fm = "pa", n.obs=100, ylabel = "Eigenvalues")

## Parallel analysis suggests that the number of factors =  1  and the number of components =  1
#Se identifican que para el segundo factor, cumplo con más del 80% de la variabilidad. 
#Analisis de factores de matriz sin rotar
acp <- principal(R,nfactors = 2, rotate = "none")
acp
## Principal Components Analysis
## Call: principal(r = R, nfactors = 2, rotate = "none")
## Standardized loadings (pattern matrix) based upon correlation matrix
##    PC1   PC2   h2    u2 com
## 1 0.97 -0.05 0.94 0.058 1.0
## 2 0.93  0.04 0.86 0.135 1.0
## 3 0.84 -0.32 0.81 0.190 1.3
## 4 0.80 -0.31 0.74 0.263 1.3
## 5 0.71  0.10 0.51 0.485 1.0
## 6 0.60  0.76 0.94 0.061 1.9
## 
##                        PC1  PC2
## SS loadings           4.01 0.79
## Proportion Var        0.67 0.13
## Cumulative Var        0.67 0.80
## Proportion Explained  0.83 0.17
## Cumulative Proportion 0.83 1.00
## 
## Mean item complexity =  1.3
## Test of the hypothesis that 2 components are sufficient.
## 
## The root mean square of the residuals (RMSR) is  0.09 
## 
## Fit based upon off diagonal values = 0.98
#Analisis de factores de matriz matriz rotarda (Varimax)
acp2 <- principal(R,nfactors = 2, rotate = "varimax")
acp2
## Principal Components Analysis
## Call: principal(r = R, nfactors = 2, rotate = "varimax")
## Standardized loadings (pattern matrix) based upon correlation matrix
##    RC1  RC2   h2    u2 com
## 1 0.88 0.42 0.94 0.058 1.4
## 2 0.80 0.48 0.86 0.135 1.6
## 3 0.89 0.12 0.81 0.190 1.0
## 4 0.85 0.11 0.74 0.263 1.0
## 5 0.58 0.42 0.51 0.485 1.8
## 6 0.16 0.96 0.94 0.061 1.1
## 
##                        RC1  RC2
## SS loadings           3.29 1.52
## Proportion Var        0.55 0.25
## Cumulative Var        0.55 0.80
## Proportion Explained  0.68 0.32
## Cumulative Proportion 0.68 1.00
## 
## Mean item complexity =  1.3
## Test of the hypothesis that 2 components are sufficient.
## 
## The root mean square of the residuals (RMSR) is  0.09 
## 
## Fit based upon off diagonal values = 0.98
#Se determina que se puede trabajar con dos factores
#La variable 1,2,3 y 4 se pueden 
#La variable 6 iría en otro factor 
#Me queda la duda que cuando se utiliza varimax pareciera que la 6 va ir sola. ¿Es esto posible? Esperar resultados de la maestra. 

Ejercicio 2

P2 <- read.csv(file = 'C:/Users/Joel Rodarte/Desktop/T7_2.csv')
corP2 <- cor(P2)

# Ho: La correlación entre cada par de variables es cero
# H1: La correlación entre cada par de variable diferente de cero

cortest.bartlett(corP2, n=100)
## $chisq
## [1] 158.632
## 
## $p.value
## [1] 6.207325e-29
## 
## $df
## [1] 10
# Se infiere que si hay correlación entre las variables

library(MVN)
mvn(P2) # los datos si son normales multivariados
## $multivariateNormality
##            Test        HZ   p value MVN
## 1 Henze-Zirkler 0.9423938 0.1490478 YES
## 
## $univariateNormality
##               Test        Variable Statistic   p value Normality
## 1 Anderson-Darling Allied.Chemical    0.3032    0.5673    YES   
## 2 Anderson-Darling     Du.Pont        0.7152    0.0601    YES   
## 3 Anderson-Darling  Union.Carbide     0.4967    0.2081    YES   
## 4 Anderson-Darling      Exxon         0.8761    0.0239    NO    
## 5 Anderson-Darling     Texaco         0.4896    0.2167    YES   
## 
## $Descriptives
##                   n       Mean    Std.Dev     Median       Min      Max
## Allied.Chemical 100 0.00543372 0.04037235  0.0000000 -0.096654 0.122807
## Du.Pont         100 0.00482707 0.03506246 -0.0024895 -0.075758 0.118812
## Union.Carbide   100 0.00565418 0.03944713  0.0000000 -0.091463 0.102616
## Exxon           100 0.00629143 0.02832547  0.0032590 -0.053133 0.088549
## Texaco          100 0.00370852 0.02754518  0.0045765 -0.050505 0.082474
##                        25th       75th      Skew    Kurtosis
## Allied.Chemical -0.01988250 0.03146875 0.2001254 -0.09307684
## Du.Pont         -0.01932550 0.02443875 0.5468161  0.34697628
## Union.Carbide   -0.01788425 0.02890450 0.1518975 -0.01779615
## Exxon           -0.01389625 0.02088050 0.6120059  0.36646575
## Texaco          -0.01525350 0.01896225 0.5035896  0.19129955
# Analisis de componentes principales para determinar cual es el sweetspot de cantidad de componentes y por ende de factores que podríamos utilizar 
# Se determina por analisis de componentes principales que se captura el 80% de la variabilidad con 3 factores  
acp_corr <- prcomp(P2)
summary(acp_corr)
## Importance of components:
##                            PC1     PC2     PC3     PC4     PC5
## Standard deviation     0.05996 0.02815 0.02714 0.02255 0.01854
## Proportion of Variance 0.60159 0.13255 0.12322 0.08511 0.05752
## Cumulative Proportion  0.60159 0.73414 0.85737 0.94248 1.00000
#Analisis de factores de matriz sin rotar
acp <- principal(corP2,nfactors = 3, rotate = "none")
acp
## Principal Components Analysis
## Call: principal(r = corP2, nfactors = 3, rotate = "none")
## Standardized loadings (pattern matrix) based upon correlation matrix
##                  PC1   PC2   PC3   h2   u2 com
## Allied.Chemical 0.78 -0.22 -0.45 0.86 0.14 1.8
## Du.Pont         0.77 -0.46  0.13 0.82 0.18 1.7
## Union.Carbide   0.79 -0.23  0.25 0.75 0.25 1.4
## Exxon           0.71  0.47  0.40 0.89 0.11 2.4
## Texaco          0.71  0.52 -0.32 0.88 0.12 2.3
## 
##                        PC1  PC2  PC3
## SS loadings           2.86 0.81 0.54
## Proportion Var        0.57 0.16 0.11
## Cumulative Var        0.57 0.73 0.84
## Proportion Explained  0.68 0.19 0.13
## Cumulative Proportion 0.68 0.87 1.00
## 
## Mean item complexity =  1.9
## Test of the hypothesis that 3 components are sufficient.
## 
## The root mean square of the residuals (RMSR) is  0.1 
## 
## Fit based upon off diagonal values = 0.96
#Analisis de factores de matriz matriz rotarda (Varimax)
acp2 <- principal(corP2,nfactors = 3, rotate = "varimax")
acp2
## Principal Components Analysis
## Call: principal(r = corP2, nfactors = 3, rotate = "varimax")
## Standardized loadings (pattern matrix) based upon correlation matrix
##                  RC1   RC2  RC3   h2   u2 com
## Allied.Chemical 0.59 -0.01 0.72 0.86 0.14 1.9
## Du.Pont         0.88  0.13 0.18 0.82 0.18 1.1
## Union.Carbide   0.77  0.36 0.16 0.75 0.25 1.5
## Exxon           0.27  0.89 0.18 0.89 0.11 1.3
## Texaco          0.07  0.54 0.77 0.88 0.12 1.8
## 
##                        RC1  RC2  RC3
## SS loadings           1.79 1.22 1.20
## Proportion Var        0.36 0.24 0.24
## Cumulative Var        0.36 0.60 0.84
## Proportion Explained  0.43 0.29 0.28
## Cumulative Proportion 0.43 0.72 1.00
## 
## Mean item complexity =  1.5
## Test of the hypothesis that 3 components are sufficient.
## 
## The root mean square of the residuals (RMSR) is  0.1 
## 
## Fit based upon off diagonal values = 0.96
# Se determina que Du Pont y Union Carbide son el primer factro
# Exxon es el segundo factor 
# Allied Chemiocal y Texaco son el tercer factor 

Ejercicio 3

P3 <- read.csv(file = 'C:/Users/Joel Rodarte/Desktop/T7_3.csv')
corP3 <- cor(P3)

# Ho: La correlación entre cada par de variables es cero
# H1: La correlación entre cada par de variable diferente de cero

cortest.bartlett(corP3, n=42)
## $chisq
## [1] 70.52667
## 
## $p.value
## [1] 2.892456e-07
## 
## $df
## [1] 21
# Se infiere que si hay correlación entre las variables

library(MVN)
mvn(P3)
## $multivariateNormality
##            Test        HZ    p value MVN
## 1 Henze-Zirkler 0.9828227 0.03504611  NO
## 
## $univariateNormality
##               Test        Variable Statistic   p value Normality
## 1 Anderson-Darling      wind          0.9891  0.0118      NO    
## 2 Anderson-Darling solar.radiation    0.9623  0.0137      NO    
## 3 Anderson-Darling       CO           2.3631  <0.001      NO    
## 4 Anderson-Darling       NO           2.1283  <0.001      NO    
## 5 Anderson-Darling       NO2          0.6975  0.0636      YES   
## 6 Anderson-Darling       O3           1.2030  0.0034      NO    
## 7 Anderson-Darling       HC           3.9150  <0.001      NO    
## 
## $Descriptives
##                  n      Mean    Std.Dev Median Min Max  25th  75th        Skew
## wind            42  7.500000  1.5811388    8.0   5  10  6.00  8.75  0.03614032
## solar.radiation 42 73.857143 17.3353881   76.5  30 107 68.25 84.75 -0.73323765
## CO              42  4.547619  1.2337209    4.0   2   7  4.00  5.00  0.57793497
## NO              42  2.190476  1.0873574    2.0   1   5  1.00  3.00  0.62983752
## NO2             42 10.047619  3.3709837    9.5   5  21  8.00 12.00  0.98543943
## O3              42  9.404762  5.5658345    8.5   2  25  6.00 11.00  1.13370267
## HC              42  3.095238  0.6917466    3.0   2   5  3.00  3.00  0.31349052
##                    Kurtosis
## wind            -1.10809524
## solar.radiation  0.30901245
## CO              -0.25497268
## NO              -0.52220171
## NO2              1.22197318
## O3               1.02388767
## HC               0.04343283
#Los datos no son normales multivariados

# Analisis de componentes principales para determinar cual es el sweetspot de cantidad de componentes y por ende de factores que podríamos utilizar 
# Se determina por analisis de componentes principales que se captura el 80% con solo un componente. Sin emargo para poder hacer diferenciación de variables utilizaré dos dado que con dos se cubren .95%
summary(acp_corr)
## Importance of components:
##                            PC1     PC2     PC3     PC4     PC5
## Standard deviation     0.05996 0.02815 0.02714 0.02255 0.01854
## Proportion of Variance 0.60159 0.13255 0.12322 0.08511 0.05752
## Cumulative Proportion  0.60159 0.73414 0.85737 0.94248 1.00000
acp3 <- principal(corP3,nfactors = 2, rotate = "varimax")
acp3
## Principal Components Analysis
## Call: principal(r = corP3, nfactors = 2, rotate = "varimax")
## Standardized loadings (pattern matrix) based upon correlation matrix
##                   RC1   RC2   h2   u2 com
## wind            -0.12 -0.47 0.24 0.76 1.1
## solar.radiation -0.08  0.69 0.48 0.52 1.0
## CO               0.70  0.47 0.71 0.29 1.7
## NO               0.76 -0.11 0.60 0.40 1.0
## NO2              0.77  0.22 0.63 0.37 1.2
## O3               0.05  0.83 0.69 0.31 1.0
## HC               0.61 -0.04 0.37 0.63 1.0
## 
##                        RC1  RC2
## SS loadings           2.05 1.67
## Proportion Var        0.29 0.24
## Cumulative Var        0.29 0.53
## Proportion Explained  0.55 0.45
## Cumulative Proportion 0.55 1.00
## 
## Mean item complexity =  1.2
## Test of the hypothesis that 2 components are sufficient.
## 
## The root mean square of the residuals (RMSR) is  0.15 
## 
## Fit based upon off diagonal values = 0.68
# Se determina que CO,NO,NO2 y HC pertenecen al primer factor. 
# Wind, Solar radiation y 03 pertenecen a el segundo factor.