Componentes Principais:Exemplo aplicação
# install.packages("Matrix")
# Exemplo 1: MMDS.ORG
S <- matrix(c(1,-2,0,-2,5,0,0,0,2),3,3)
eigS <- eigen(S)
Svec <- round(eigS$vectors,digits=3)
Sval <- round(eigS$values,digits=3)
eigS
## eigen() decomposition
## $values
## [1] 5.8284271 2.0000000 0.1715729
##
## $vectors
## [,1] [,2] [,3]
## [1,] -0.3826834 0 0.9238795
## [2,] 0.9238795 0 0.3826834
## [3,] 0.0000000 1 0.0000000
Svec
## [,1] [,2] [,3]
## [1,] -0.383 0 0.924
## [2,] 0.924 0 0.383
## [3,] 0.000 1 0.000
Sval
## [1] 5.828 2.000 0.172
# Verificando que o traço da matriz de variância-covariância é igual à soma dos autovalores.
sum(diag(S)) # Traço da matriz sigma
## [1] 8
sum(Sval) # Soma dos autovalores
## [1] 8
round(cumsum(Sval)/sum(Sval)*100, digits=2)
## [1] 72.85 97.85 100.00
# Biplot com dados de Greenacre
# Exemplo explicado pelo paper da Fundación BBVA
xm <- matrix(c(2,2,1,2,-1,1,1,-1,2,-2),5,2,byrow=T)
ym <- matrix(c(3,2,-1,-2,1,-1,2,-1),2,4,byrow=T)
colnames(ym) <- c("y1","y2","y3","y4")
rownames(xm) <- c("x1","x2","x3","x4","x5")
biplot(xm,t(ym), col=c("darkgreen","red"),
cex=1.2, expand = 2,
xlim = c(-3,4), ylim = c(-3,3))
abline(v=0,h=0,lty=3, col = "black" )
dm <- matrix(c(8,2,2,-6,
5,0,3,-4,
-2,-3,3,1,
2,3,-3,-1,
4,6,-6,-2),5,4,byrow=T)
biplot(prcomp(dm))
abline(v=0,h=0)
# Exemplo 8.10 de J&W: Taxas de retorno semanais
setwd("C:/Users/ricardo.ramos/OneDrive - Ministerio do Desenvolvimento da Industria e Comercio Exterior/UNB/Multivariada/R-Multivariada")
stock <- read.delim("T8-4.txt",header=F)
stock[1:5,]
## V1 V2 V3 V4 V5
## 1 0.0130338 -0.0078431 -0.0031889 -0.0447693 0.0052151
## 2 0.0084862 0.0166886 -0.0062100 0.0119560 0.0134890
## 3 -0.0179153 -0.0086393 0.0100360 0.0000000 -0.0061428
## 4 0.0215589 -0.0034858 0.0174353 -0.0285917 -0.0069534
## 5 0.0108225 0.0037167 -0.0101345 0.0291900 0.0409751
#library(data.table)
nomes <- c("JPMorgan", "Citibank", "WellsFargo",
"Shell", "Exxon")
setnames(stock,nomes)
stock[1:5,]
## JPMorgan Citibank WellsFargo Shell Exxon
## 1 0.0130338 -0.0078431 -0.0031889 -0.0447693 0.0052151
## 2 0.0084862 0.0166886 -0.0062100 0.0119560 0.0134890
## 3 -0.0179153 -0.0086393 0.0100360 0.0000000 -0.0061428
## 4 0.0215589 -0.0034858 0.0174353 -0.0285917 -0.0069534
## 5 0.0108225 0.0037167 -0.0101345 0.0291900 0.0409751
cpstock <- prcomp(~ JPMorgan + Citibank + WellsFargo
+ Shell + Exxon,
data=stock, scale = F)
plot(cpstock)
screeplot(cpstock,type=c("lines"))
summary(cpstock)
## Importance of components:
## PC1 PC2 PC3 PC4 PC5
## Standard deviation 0.03698 0.02648 0.01593 0.01194 0.01090
## Proportion of Variance 0.52926 0.27133 0.09822 0.05518 0.04601
## Cumulative Proportion 0.52926 0.80059 0.89881 0.95399 1.00000
cpstock
## Standard deviations (1, .., p=5):
## [1] 0.03698213 0.02647942 0.01593118 0.01194163 0.01090352
##
## Rotation (n x k) = (5 x 5):
## PC1 PC2 PC3 PC4 PC5
## JPMorgan -0.2228228 0.6252260 -0.32611218 0.6627590 -0.11765952
## Citibank -0.3072900 0.5703900 0.24959014 -0.4140935 0.58860803
## WellsFargo -0.1548103 0.3445049 0.03763929 -0.4970499 -0.78030428
## Shell -0.6389680 -0.2479475 0.64249741 0.3088689 -0.14845546
## Exxon -0.6509044 -0.3218478 -0.64586064 -0.2163758 0.09371777
cpstock$rotation
## PC1 PC2 PC3 PC4 PC5
## JPMorgan -0.2228228 0.6252260 -0.32611218 0.6627590 -0.11765952
## Citibank -0.3072900 0.5703900 0.24959014 -0.4140935 0.58860803
## WellsFargo -0.1548103 0.3445049 0.03763929 -0.4970499 -0.78030428
## Shell -0.6389680 -0.2479475 0.64249741 0.3088689 -0.14845546
## Exxon -0.6509044 -0.3218478 -0.64586064 -0.2163758 0.09371777
biplot(cpstock,col=c("black","red"))
## V1 V2 V3 V4 V5
## 1 2.67 5.71 69.02 30.3 1.48
## 2 2.25 4.37 72.98 43.3 1.44
## 3 3.12 10.27 64.94 32.0 2.11
## 4 5.14 7.44 71.29 24.5 1.85
## 5 5.54 9.25 74.94 31.0 2.23
## Population Degree Employed Government Median
## 1 2.67 5.71 69.02 30.3 1.48
## 2 2.25 4.37 72.98 43.3 1.44
## 3 3.12 10.27 64.94 32.0 2.11
## 4 5.14 7.44 71.29 24.5 1.85
## 5 5.54 9.25 74.94 31.0 2.23
## Importance of components:
## PC1 PC2 PC3 PC4 PC5
## Standard deviation 10.345 6.2986 2.89324 1.69348 0.39331
## Proportion of Variance 0.677 0.2510 0.05295 0.01814 0.00098
## Cumulative Proportion 0.677 0.9279 0.98088 0.99902 1.00000
## Standard deviations (1, .., p=5):
## [1] 10.3448177 6.2985820 2.8932449 1.6934798 0.3933104
##
## Rotation (n x k) = (5 x 5):
## PC1 PC2 PC3 PC4 PC5
## Population 0.038887287 -0.07114494 0.18789258 0.97713524 -0.057699864
## Degree -0.105321969 -0.12975236 -0.96099580 0.17135181 -0.138554092
## Employed 0.492363944 -0.86438807 0.04579737 -0.09104368 0.004966048
## Government -0.863069865 -0.48033178 0.15318538 -0.02968577 0.006691800
## Median -0.009122262 -0.01474342 -0.12498114 0.08170118 0.988637470
## PC1 PC2 PC3 PC4 PC5
## Population 0.038887287 -0.07114494 0.18789258 0.97713524 -0.057699864
## Degree -0.105321969 -0.12975236 -0.96099580 0.17135181 -0.138554092
## Employed 0.492363944 -0.86438807 0.04579737 -0.09104368 0.004966048
## Government -0.863069865 -0.48033178 0.15318538 -0.02968577 0.006691800
## Median -0.009122262 -0.01474342 -0.12498114 0.08170118 0.988637470
## Exemplo 8.18 de J&W: Recordes nacionais para provas de corrida - mulheres.
setwd("C:/Users/ricardo.ramos/OneDrive - Ministerio do Desenvolvimento da Industria e Comercio Exterior/UNB/Multivariada/R-Multivariada")
library(Matrix)
recor <- read.delim("T1-9.dat",header=F)
recor[1:5,]
## V1 V2 V3 V4 V5 V6 V7 V8
## 1 ARG 11.57 22.94 52.50 2.05 4.25 9.19 150.32
## 2 AUS 11.12 22.23 48.63 1.98 4.02 8.63 143.51
## 3 AUT 11.15 22.70 50.62 1.94 4.05 8.78 154.35
## 4 BEL 11.14 22.48 51.45 1.97 4.08 8.82 143.05
## 5 BER 11.46 23.05 53.30 2.07 4.29 9.81 174.18
nomes <- c("Pais","v100m","v200m","v400m","v800m",
"v1500m","v3000m","Maratona")
setnames(recor,nomes)
recor[1:5,]
## Pais v100m v200m v400m v800m v1500m v3000m Maratona
## 1 ARG 11.57 22.94 52.50 2.05 4.25 9.19 150.32
## 2 AUS 11.12 22.23 48.63 1.98 4.02 8.63 143.51
## 3 AUT 11.15 22.70 50.62 1.94 4.05 8.78 154.35
## 4 BEL 11.14 22.48 51.45 1.97 4.08 8.82 143.05
## 5 BER 11.46 23.05 53.30 2.07 4.29 9.81 174.18
#var(recor[,2:8])
#cor(recor[,2:8])
cprecor <- prcomp(~ v100m + v200m + v400m + v800m + v1500m
+ v3000m + Maratona,
data=recor, scale = T)
attributes(cprecor)
## $names
## [1] "sdev" "rotation" "center" "scale" "x" "call"
##
## $class
## [1] "prcomp"
plot(cprecor) # ou screeplot(cprecor,type=c("lines"))
plot(cumsum(cprecor$sdev**2)/sum(cprecor$sdev**2),
type="b", ylim = c(0.8,1),
xlab="CPs", ylab="Proporção da Variância")
summary(cprecor)
## Importance of components:
## PC1 PC2 PC3 PC4 PC5 PC6
## Standard deviation 2.4099 0.79290 0.5285 0.35292 0.3016 0.23349
## Proportion of Variance 0.8297 0.08981 0.0399 0.01779 0.0130 0.00779
## Cumulative Proportion 0.8297 0.91947 0.9594 0.97717 0.9902 0.99796
## PC7
## Standard deviation 0.11959
## Proportion of Variance 0.00204
## Cumulative Proportion 1.00000
cprecor
## Standard deviations (1, .., p=7):
## [1] 2.4099013 0.7929019 0.5285211 0.3529231 0.3016152 0.2334927 0.1195921
##
## Rotation (n x k) = (7 x 7):
## PC1 PC2 PC3 PC4 PC5
## v100m -0.3777657 0.4071756 0.1405803 -0.58706293 0.16706891
## v200m -0.3832103 0.4136291 0.1007833 -0.19407501 -0.09350016
## v400m -0.3680361 0.4593531 -0.2370255 0.64543118 -0.32727328
## v800m -0.3947810 -0.1612459 -0.1475424 0.29520804 0.81905467
## v1500m -0.3892610 -0.3090877 0.4219855 0.06669044 -0.02613100
## v3000m -0.3760945 -0.4231899 0.4060627 0.08015699 -0.35169796
## Maratona -0.3552031 -0.3892153 -0.7410610 -0.32107640 -0.24700821
## PC6 PC7
## v100m -0.53969730 -0.08893934
## v200m 0.74493139 0.26565662
## v400m -0.24009405 -0.12660435
## v800m 0.01650651 0.19521315
## v1500m 0.18898771 -0.73076817
## v3000m -0.24049968 0.57150644
## Maratona 0.04826992 -0.08208401
biplot(cprecor,col=c("black","red"),
xlabs=recor$Pais,
cex=c(1,1.2))
cprecor$x[,1]
## 1 2 3 4 5
## -0.393240234 1.931642887 1.262520373 1.291730279 -1.396108552
## 6 7 8 9 10
## 1.006778878 1.734340591 -0.811838204 2.989466907 -0.001927672
## 11 12 13 14 15
## -7.906227224 -2.166811506 2.406030321 0.082495533 -2.192409809
## 16 17 18 19 20
## 1.266731340 2.518345696 3.047516603 2.442706280 1.197800425
## 21 22 23 24 25
## -3.294123799 0.788251063 -1.741942057 0.354256642 1.035907216
## 26 27 28 29 30
## -0.574161730 1.547452839 0.481657610 0.917735409 -0.830794629
## 31 32 33 34 35
## -1.455347346 -1.721467731 -1.495210140 -1.749727754 0.995766285
## 36 37 38 39 40
## -0.815981458 1.544760622 0.755235487 0.553003461 -5.257449747
## 41 42 43 44 45
## -1.763533682 2.273765780 1.175249957 2.123005711 3.042948214
## 46 47 48 49 50
## -8.213415123 -3.093919517 1.889462264 0.839149567 1.113545239
## 51 52 53 54
## -0.659093139 -1.223805050 0.850127798 3.299148823
# Verificando a ordem dos Países em relação à 1ª componente principal
recor$Pais[order(cprecor$x[,1],decreasing = T)[recor$Pais]]
## [1] USA GER RUS CHN FRA GBR CZE POL ROM ESP
## [11] AUS CAN ITA NED BEL AUT GRE SUI POR IRL
## [21] BRA MEX KEN TUR SWE HUN NZL NOR JPN DEN
## [31] IND COL ARG TPE ISR CHI MYA THA KOR, S KOR, N
## [41] BER MAS LUX INA MRI PHI CRC FIN SIN DOM
## [51] PNG GUA COK SAM
## 54 Levels: ARG AUS AUT BEL BER BRA CAN CHI CHN COK COL CRC CZE DEN ... USA
# Verificando a ordem dos Países em relação à 2ª componente principal
recor$Pais[order(cprecor$x[,2],decreasing = T)[recor$Pais]]
## [1] KOR, N KEN LUX NOR SIN CHI IRL KOR, S DEN JPN
## [11] POR MYA HUN TUR GUA MRI COK NZL INA ROM
## [21] NED ITA ARG GBR BEL SWE CHN AUT ISR IND
## [31] ESP CAN CRC FIN BRA DOM RUS AUS MEX MAS
## [41] POL PHI CZE BER GRE THA GER SUI TPE COL
## [51] USA FRA PNG SAM
## 54 Levels: ARG AUS AUT BEL BER BRA CAN CHI CHN COK COL CRC CZE DEN ... USA
# Analisando a relação entre a 1ª CP e a 3ª CP
biplot(cprecor, choices=c(1,3))
setwd("C:/Users/ricardo.ramos/OneDrive - Ministerio do Desenvolvimento da Industria e Comercio Exterior/UNB/Multivariada/R-Multivariada")
empresas <- read.delim("MingotiTB3.1.tsv",header=T)
empresas <- tbl_df(empresas)
filter_empresas<-empresas %>% select(gbt,glq,patr)
# Matriz de covariância
cov(filter_empresas)
## gbt glq patr
## gbt 9550608.6 706121.06 14978232.5
## glq 706121.1 76269.52 933915.1
## patr 14978232.5 933915.06 34408113.0
cpempresas <- prcomp(~ gbt + glq + patr,
data=empresas, scale = F)
attributes(cpempresas)
## $names
## [1] "sdev" "rotation" "center" "scale" "x" "call"
##
## $class
## [1] "prcomp"
plot(cpempresas) # ou
screeplot(cpempresas,type=c("lines"))
plot(cumsum(cpempresas$sdev**2)/sum(cpempresas$sdev**2),
type="b", ylim = c(0.8,1),
xlab="CPs", ylab="Proporção da Variância")
summary(cpempresas)
## Importance of components:
## PC1 PC2 PC3
## Standard deviation 6440.0615 1.594e+03 145.23266
## Proportion of Variance 0.9418 5.767e-02 0.00048
## Cumulative Proportion 0.9418 9.995e-01 1.00000
cpempresas
## Standard deviations (1, .., p=3):
## [1] 6440.0615 1593.5831 145.2327
##
## Rotation (n x k) = (3 x 3):
## PC1 PC2 PC3
## gbt -0.42509725 -0.89970680 -0.09909593
## glq -0.02766083 -0.09651661 0.99494694
## patr -0.90472493 0.42569029 0.01614231
biplot(cpempresas,col=c("black","red"),
xlabs=empresas$emp,
cex=c(1,1.2))
cpempresas$x[,1]
## 1 2 3 4 5 6
## -8857.5943 -8079.3608 -11257.9265 690.7986 -3844.0909 5915.4162
## 7 8 9 10 11 12
## 5504.9703 3796.3802 7729.1502 3848.1751 3989.1624 564.9194
# Verificando a ordem dos Países em relação à 1ª componente principal
empresas$emp[order(cpempresas$x[,1],decreasing = T)[empresas$emp]]
## [1] E9 E10 E8 E4 E12 E5 E2 E1 E3 E6 E7 E11
## Levels: E1 E10 E11 E12 E2 E3 E4 E5 E6 E7 E8 E9
setwd("C:/Users/ricardo.ramos/OneDrive - Ministerio do Desenvolvimento da Industria e Comercio Exterior/UNB/Multivariada/R-Multivariada")
coxinhas <- read.delim("MingotiTB3.5.tsv",header=T)
coxinhas <- tbl_df(coxinhas)
filter_coxinhas<-coxinhas %>% select(sabor, aroma,massa, recheio)
# Matriz de covariância
cov(filter_coxinhas)
## sabor aroma massa recheio
## sabor 0.4069071 0.1822107 0.3573571 0.5504750
## aroma 0.1822107 0.1104411 0.1796357 0.2713804
## massa 0.3573571 0.1796357 0.4242000 0.5897357
## recheio 0.5504750 0.2713804 0.5897357 0.9105696
cpcoxinhas <- prcomp(~ sabor + aroma + massa +recheio,
data=coxinhas, scale = F)
attributes(cpcoxinhas)
## $names
## [1] "sdev" "rotation" "center" "scale" "x" "call"
##
## $class
## [1] "prcomp"
plot(cpcoxinhas) # ou
screeplot(cpcoxinhas,type=c("lines"))
plot(cumsum(cpcoxinhas$sdev**2)/sum(cpcoxinhas$sdev**2),
type="b", ylim = c(0.8,1),
xlab="CPs", ylab="Proporção da Variância")
summary(cpcoxinhas)
## Importance of components:
## PC1 PC2 PC3 PC4
## Standard deviation 1.3179 0.25473 0.16707 0.14992
## Proportion of Variance 0.9378 0.03503 0.01507 0.01214
## Cumulative Proportion 0.9378 0.97279 0.98786 1.00000
cpcoxinhas
## Standard deviations (1, .., p=4):
## [1] 1.3178933 0.2547255 0.1670736 0.1499212
##
## Rotation (n x k) = (4 x 4):
## PC1 PC2 PC3 PC4
## sabor 0.4557692 0.8160940 -0.1121458 -0.33717690
## aroma 0.2234675 0.2149422 -0.2691445 0.91182420
## massa 0.4770120 -0.4564207 -0.7177864 -0.22118397
## recheio 0.7174930 -0.2819051 0.6322715 0.07724004
biplot(cpcoxinhas,col=c("black","red"),
xlabs=coxinhas$marca,
cex=c(1,1.2))
cpcoxinhas$x[,1]
## 1 2 3 4 5 6
## -1.72821832 -0.25202070 -0.87331991 1.60440053 1.29635636 -1.42969189
## 7 8
## 1.42970986 -0.04721593
# Verificando a ordem dos Países em relação à 1ª componente principal
coxinhas$marca[order(cpcoxinhas$x[,1],decreasing = T)[coxinhas$marca]]
## [1] M4 M7 M5 M8 M2 M3 M6 M1
## Levels: M1 M2 M3 M4 M5 M6 M7 M8
Note that the echo = FALSE parameter was added to the code chunk to prevent printing of the R code that generated the plot.