ExercÃcio 28
#Item a:
library(Matrix)
## Warning: package 'Matrix' was built under R version 3.4.4
A<- matrix(c(3,2,3,2,2,5,1,1,3,1,8,2,2,1,2,3),4,4,byrow=T)
# Encontrando B, a matriz ortogonal composta pelos autovetores normalizados
# de A.
B <- eigen(A)
B$vectors
## [,1] [,2] [,3] [,4]
## [1,] -0.447 0.177 0.263 0.836
## [2,] -0.327 0.838 -0.365 -0.238
## [3,] -0.760 -0.507 -0.362 -0.185
## [4,] -0.339 0.093 0.817 -0.457
# Verificando que o produto B*B' é a matriz identidade
Identidade <- round( tcrossprod(B$vectors),3)
Identidade
## [,1] [,2] [,3] [,4]
## [1,] 1 0 0 0
## [2,] 0 1 0 0
## [3,] 0 0 1 0
## [4,] 0 0 0 1
# Calculando a matriz diagonal D
D = diag(B$values,4,4)
D
## [,1] [,2] [,3] [,4]
## [1,] 11.1 0.00 0.00 0.000
## [2,] 0.0 4.93 0.00 0.000
## [3,] 0.0 0.00 2.31 0.000
## [4,] 0.0 0.00 0.00 0.673
# Verificando que B e D reproduzem A
# Reprodução de A pela decomposicao expectral
A_=B$vectors %*% D %*% t(B$vectors)
A_
## [,1] [,2] [,3] [,4]
## [1,] 3 2 3 2
## [2,] 2 5 1 1
## [3,] 3 1 8 2
## [4,] 2 1 2 3
# Item b
# Obtendo a raiz quadrada da matriz A
# Solução: Utilizamos a matriz ortogonal B e também a matriz D
raiz2_D =diag(sqrt(B$values),4,4)
raiz2_A = B$vectors %*% raiz2_D %*% t(B$vectors)
raiz2_A
## [,1] [,2] [,3] [,4]
## [1,] 1.414 0.508 0.660 0.554
## [2,] 0.508 2.167 0.122 0.179
## [3,] 0.660 0.122 2.722 0.374
## [4,] 0.554 0.179 0.374 1.588
## Mostrando que o quadrado da raiz quadrada de A resulta na matriz original A
raiz2_A %*% raiz2_A
## [,1] [,2] [,3] [,4]
## [1,] 3 2 3 2
## [2,] 2 5 1 1
## [3,] 3 1 8 2
## [4,] 2 1 2 3
# Item C
InversaB= diag(1/sqrt(B$values),4,4)
InversadaRaiz_A=B$vectors %*% InversaB %*% t(B$vectors)
InversadaRaiz_A
## [,1] [,2] [,3] [,4]
## [1,] 0.973 -0.19495 -0.1897 -0.27239
## [2,] -0.195 0.50549 0.0239 0.00529
## [3,] -0.190 0.02387 0.4171 -0.03483
## [4,] -0.272 0.00529 -0.0348 0.73234
# Mostrando que a multiplicação da inversa da raiz de A pela matriz raiz de A
# resulta na matriz original A
round(InversadaRaiz_A %*% raiz2_A,3)
## [,1] [,2] [,3] [,4]
## [1,] 1 0 0 0
## [2,] 0 1 0 0
## [3,] 0 0 1 0
## [4,] 0 0 0 1
#---------------------FIM QUESTAO 28-LISTA 4--------------
census <- read.delim("T8-5.Dat.txt",sep="",header=F)
library(dplyr)
## Warning: package 'dplyr' was built under R version 3.4.4
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
census0 <- tbl_df(census)
census<-tbl_df(cbind(cbind(census$V1,census$V2,census$V3,census$V4,10*census$V5)))
# Item a: Obtendo a matriz de covariância
cov_census<-tbl_df(round(cov(census),3))
cov_census
## # A tibble: 5 x 5
## V1 V2 V3 V4 V5
## <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 3.40 -1.10 4.31 -2.08 0.272
## 2 -1.10 9.67 -1.51 11.0 12.0
## 3 4.31 -1.51 55.6 -28.9 -0.436
## 4 -2.08 11.0 -28.9 89.1 9.57
## 5 0.272 12.0 -0.436 9.57 31.9
# Item b: Obtendo o par autovalor-autovetor associado à matriz census
eigen<-eigen(cov_census)
# Autovalores
eigen$values
## [1] 108.27 43.14 31.27 4.60 2.35
# Autovetores
eigen$vectors
## [,1] [,2] [,3] [,4] [,5]
## [1,] 0.0376 -0.0623 -0.040 0.5555 0.8273
## [2,] -0.1189 -0.2493 0.261 -0.7684 0.5152
## [3,] 0.4797 -0.7597 -0.431 -0.0281 -0.0810
## [4,] -0.8589 -0.3164 -0.394 0.0687 -0.0499
## [5,] -0.1289 -0.5067 0.768 0.3090 -0.2026
cpcensus <- prcomp(~ V1+V2+V3+V4+V5,
data=census, scale = F)
# Extraindo as 2 primeiras componentes principais da matrix de covariância
as.matrix(cbind(cpcensus$rotation[,1],cpcensus$rotation[,2]))
## [,1] [,2]
## V1 -0.0376 0.0623
## V2 0.1189 0.2493
## V3 -0.4797 0.7597
## V4 0.8589 0.3164
## V5 0.1289 0.5067
# A proporção da variância explicada pelas 2 primeiras componentes principais
# é dada pela relação
(eigen$values[1]+eigen$values[2])/sum(diag(cov(census)))*100
## [1] 79.8
# A proporção explicada pelas duas primeiras componentes principais mostrou-se # menor que o resultado do exercÃcio 8.3. Isso mostra que ao multiplicar a
# coluna 5 pelo fator 10 aumenta a importância relativa dessa variável na
# composição da componente principal, resultando na diminuição relativa das
# outras variáveis na composição da componente principal.
#
radiotherapy <- read.delim("T1-7.Dat.txt",sep="",header=F)
radiotherapy<-tbl_df(radiotherapy)
# Item a.1: Calculando a matriz de covariância
cov(radiotherapy)
## V1 V2 V3 V4 V5 V6
## V1 4.655 0.9313 0.590 0.2769 1.07489 0.15815
## V2 0.931 0.6128 0.111 0.1185 0.38889 -0.02485
## V3 0.590 0.1109 0.571 0.0870 0.34799 0.11013
## V4 0.277 0.1185 0.087 0.1104 0.21741 0.02181
## V5 1.075 0.3889 0.348 0.2174 0.86217 -0.00882
## V6 0.158 -0.0249 0.110 0.0218 -0.00882 0.86146
# Item a.2: Calculando a matriz de correlação
cor(radiotherapy)
## V1 V2 V3 V4 V5 V6
## V1 1.000 0.5514 0.362 0.3863 0.5366 0.0790
## V2 0.551 1.0000 0.187 0.4554 0.5350 -0.0342
## V3 0.362 0.1875 1.000 0.3464 0.4958 0.1570
## V4 0.386 0.4554 0.346 1.0000 0.7046 0.0707
## V5 0.537 0.5350 0.496 0.7046 1.0000 -0.0102
## V6 0.079 -0.0342 0.157 0.0707 -0.0102 1.0000
#Item B: Escolhemos a matriz R, pois ao analisar a proporção de explicação da
# variância das matrizes S e R, verifica-se que a matriz S tem maior
# concentração da primeira componente principal, enquanto que na matriz R esse # valor é mais diluido, conforme cálculos abaixo:
# Porcentagem da variância total da matriz de Variância
S_eigen<-eigen(cov(radiotherapy))
S_eigen$values[1]/sum(S_eigen$values)
## [1] 0.689
# Porcentagem da variância total da matriz de correlação
R_eigen<-eigen(cor(radiotherapy))
R_eigen$values[1]/sum(R_eigen$values)
## [1] 0.477
sum_diag<-sum(diag(cor(census)))
R_eigenvalue1_percent<-round(R_eigen$values[1]/sum_diag*100,2)
R_eigenvalue2_percent<-round(R_eigen$values[2]/sum_diag*100,2)
R_eigenvalue3_percent<-round(R_eigen$values[3]/sum_diag*100,2)
R_eigenvalue4_percent<-round(R_eigen$values[4]/sum_diag*100,2)
R_eigenvalue5_percent<-round(R_eigen$values[5]/sum_diag*100,2)
# Tabela com a participação dos autovalores na variância total
contribution<-matrix(c(R_eigenvalue1_percent,R_eigenvalue2_percent,R_eigenvalue3_percent,R_eigenvalue4_percent,R_eigenvalue5_percent),ncol=5,byrow=TRUE)
colnames(contribution)<-c("Autovalor 1","Autovalor 2","Autovalor 3","Autovalor 4","Autovalor 5")
contribution
## Autovalor 1 Autovalor 2 Autovalor 3 Autovalor 4 Autovalor 5
## [1,] 57.3 21.5 15.6 13 7.76
# Item c:
# Resposta: Não se mostra interessante sumarizar os dados com apenas a
# primeira componente principal, já que a participação na variância total,
# ainda que alta(57,29%), necessita de mais outra componente para que a
# componentes tragam um percentual mais alto de explicação dos dados.
Lista 5: Análise Fatorial
ExercÃcio 38-(9.10 Johnson e Wichern)
# Reproduzindo os resultados obtidos para os valores estimados dos loadings
library(psych)
## Warning: package 'psych' was built under R version 3.4.4
R <- matrix(c(1.000,0.505,0.569,0.602,0.621,0.603,
0.505,1.000,0.422,0.467,0.482,0.450,
0.569,0.422,1.000,0.926,0.877,0.878,
0.602,0.467,0.926,1.000,0.874,0.894,
0.621,0.482, 0.877, 0.874, 1.000, 0.937,
0.603, 0.450, 0.878, 0.894, 0.937, 1.000)
,6,6)
factor_loadings<-fa(R,nfactors = 2,fm="ml",rotate="varimax")
factor_loadings
## Factor Analysis using method = ml
## Call: fa(r = R, nfactors = 2, rotate = "varimax", fm = "ml")
## Standardized loadings (pattern matrix) based upon correlation matrix
## ML2 ML1 h2 u2 com
## 1 0.48 0.41 0.40 0.5981 2.0
## 2 0.37 0.32 0.24 0.7586 1.9
## 3 0.60 0.72 0.88 0.1204 1.9
## 4 0.52 0.85 1.00 0.0050 1.7
## 5 0.86 0.50 0.99 0.0062 1.6
## 6 0.74 0.60 0.91 0.0949 1.9
##
## ML2 ML1
## SS loadings 2.31 2.11
## Proportion Var 0.38 0.35
## Cumulative Var 0.38 0.74
## Proportion Explained 0.52 0.48
## Cumulative Proportion 0.52 1.00
##
## Mean item complexity = 1.8
## Test of the hypothesis that 2 factors are sufficient.
##
## The degrees of freedom for the null model are 15 and the objective function was 6.75
## The degrees of freedom for the model are 4 and the objective function was 0.1
##
## The root mean square of the residuals (RMSR) is 0.05
## The df corrected root mean square of the residuals is 0.1
##
## Fit based upon off diagonal values = 0.99
## Measures of factor score adequacy
## ML2 ML1
## Correlation of (regression) scores with factors 0.99 0.99
## Multiple R square of scores with factors 0.98 0.98
## Minimum correlation of possible factor scores 0.95 0.95
# Item D:
loadings<-matrix(c(0.602,0.200,0.467,0.154,0.926,0.143,1,0,0.874,0.476,0.894,0.327),6,2,byrow=TRUE)
matriz_vari_especifica<-diag(c(0.597,0.758,0.122,0,0.0095,0.093))
matriz_vari_especifica
## [,1] [,2] [,3] [,4] [,5] [,6]
## [1,] 0.597 0.000 0.000 0 0.0000 0.000
## [2,] 0.000 0.758 0.000 0 0.0000 0.000
## [3,] 0.000 0.000 0.122 0 0.0000 0.000
## [4,] 0.000 0.000 0.000 0 0.0000 0.000
## [5,] 0.000 0.000 0.000 0 0.0095 0.000
## [6,] 0.000 0.000 0.000 0 0.0000 0.093
Residuo=R-loadings %*% t(loadings) - matriz_vari_especifica
Residuo
## [,1] [,2] [,3] [,4] [,5] [,6]
## [1,] 0.000596 0.193066 -0.017052 0 -0.000348 -0.000588
## [2,] 0.193066 0.000195 -0.032464 0 0.000538 -0.017856
## [3,] -0.017052 -0.032464 0.000075 0 -0.000392 0.003395
## [4,] 0.000000 0.000000 0.000000 0 0.000000 0.000000
## [5,] -0.000348 0.000538 -0.000392 0 0.000048 -0.000008
## [6,] -0.000588 -0.017856 0.003395 0 -0.000008 0.000835
Lista 6
ExercÃcio 42
mu <- c(3,2)
#Sigma <- matrix(c(1, -1.5, -1.5,4), nrow = 2, ncol = 2)
Sigma <- matrix(c(2,0,0,2), nrow = 2, ncol = 2)
# Gerando a normal bivariada por decomposição espectral
rmvn.eigen <-
function(n, mu, Sigma) {
p <- length(mu)
ev <- eigen(Sigma, symmetric = TRUE)
lambda <- ev$values
V <- ev$vectors
R <- V %*% diag(sqrt(lambda)) %*% t(V)
Z <- matrix(rnorm(n*p), nrow = n, ncol = p)
X <- Z %*% R + matrix(mu, n, p, byrow = TRUE)
X
}
# GERANDO AS AMOSTRAS
a1 <- rmvn.eigen(1000, mu, Sigma)
plot(a1, xlab = "x", ylab = "y", pch = 20,xlim=c(0,10),ylim=c(-4,6))
print(colMeans(a1))
## [1] 2.94 1.93
print(cor(a1))
## [,1] [,2]
## [1,] 1.00000 -0.00485
## [2,] -0.00485 1.00000
library(car)
## Warning: package 'car' was built under R version 3.4.4
## Loading required package: carData
## Warning: package 'carData' was built under R version 3.4.4
##
## Attaching package: 'car'
## The following object is masked from 'package:psych':
##
## logit
## The following object is masked from 'package:dplyr':
##
## recode
dataEllipse(a1,pch=20, main = "Decomposicao Espectral",xlim=c(0,6),ylim=c(-4,8),levels=c(0.5,0.95))
Note that the echo = FALSE parameter was added to the code chunk to prevent printing of the R code that generated the plot.