Análisis de Componenetes Principales

library(readr)
library(dplyr)

Los datos

getwd()
## [1] "C:/Users/Usuario/Documents/Ciencia de los datos/Diplomado en CD 2019/proyectos Machine Learning/FundamentosMachineLearning/markdown"
datos  <- read.csv("../datos/USArrests.csv", 
                  header = TRUE, 
                  stringsAsFactors = F, 
                  na.strings = TRUE)
head(datos)
##            X Murder Assault UrbanPop Rape
## 1    Alabama   13.2     236       58 21.2
## 2     Alaska   10.0     263       48 44.5
## 3    Arizona    8.1     294       80 31.0
## 4   Arkansas    8.8     190       50 19.5
## 5 California    9.0     276       91 40.6
## 6   Colorado    7.9     204       78 38.7
tail(datos)
##                X Murder Assault UrbanPop Rape
## 45       Vermont    2.2      48       32 11.2
## 46      Virginia    8.5     156       63 20.7
## 47    Washington    4.0     145       73 26.2
## 48 West Virginia    5.7      81       39  9.3
## 49     Wisconsin    2.6      53       66 10.8
## 50       Wyoming    6.8     161       60 15.6

Cambiar el nombre de los regisrtos

row.names(datos) <- datos$X

#datos$X <- NULL
datos <- select(datos, Murder, Assault, UrbanPop, Rape)

head(datos)
##            Murder Assault UrbanPop Rape
## Alabama      13.2     236       58 21.2
## Alaska       10.0     263       48 44.5
## Arizona       8.1     294       80 31.0
## Arkansas      8.8     190       50 19.5
## California    9.0     276       91 40.6
## Colorado      7.9     204       78 38.7

Calcular la variabilidad para cada columna

variabilidad <- apply(datos, 2, var)
variabilidad 
##     Murder    Assault   UrbanPop       Rape 
##   18.97047 6945.16571  209.51878   87.72916

Determinar ACP

acp <- prcomp(datos, center = TRUE, scale=TRUE)
print(acp)
## Standard deviations (1, .., p=4):
## [1] 1.5748783 0.9948694 0.5971291 0.4164494
## 
## Rotation (n x k) = (4 x 4):
##                 PC1        PC2        PC3         PC4
## Murder   -0.5358995  0.4181809 -0.3412327  0.64922780
## Assault  -0.5831836  0.1879856 -0.2681484 -0.74340748
## UrbanPop -0.2781909 -0.8728062 -0.3780158  0.13387773
## Rape     -0.5434321 -0.1673186  0.8177779  0.08902432

Mostrar ACP por por plot de tipo lineal

plot(acp, type='l')

Summary acp

summary(acp)
## Importance of components:
##                           PC1    PC2     PC3     PC4
## Standard deviation     1.5749 0.9949 0.59713 0.41645
## Proportion of Variance 0.6201 0.2474 0.08914 0.04336
## Cumulative Proportion  0.6201 0.8675 0.95664 1.00000

biplot acp()

biplot(acp)

Calcular solo con los dos comonentes principales

pc1 <- apply(acp$rotation[,1] * datos, 1, sum)

pc2 <- apply(acp$rotation[,2] * datos, 1, sum)

datos$pc1 <- pc1
datos$pc2 <- pc2

datos
##                Murder Assault UrbanPop Rape        pc1          pc2
## Alabama          13.2     236       58 21.2 -109.70674 -194.7112752
## Alaska           10.0     263       48 44.5 -200.93002  -40.5473153
## Arizona           8.1     294       80 31.0 -198.67595   59.0145557
## Arkansas          8.8     190       50 19.5 -154.13078   29.5446483
## California        9.0     276       91 40.6 -141.66518 -234.5123542
## Colorado          7.9     204       78 38.7 -181.98644  -24.4602694
## Connecticut       3.3     110       77 11.1  -87.23615  -19.4446345
## Delaware          5.9     238       72 15.8 -190.34537   34.6766247
## Florida          15.4     335       80 31.9 -153.19304 -280.3381377
## Georgia          17.4     211       60 25.8 -173.82313  -25.0709671
## Hawaii            5.3      46       83 20.2  -60.04080  -49.3852136
## Idaho             2.6     120       54 14.2 -109.02150   15.7574333
## Illinois         10.4     249       83 24.0 -125.99912 -199.2179979
## Indiana           7.2     113       65 21.0 -114.92576   -8.8481365
## Iowa              2.2      56       57 11.3  -52.53493  -23.5265544
## Kansas            6.0     115       66 18.0 -116.69053   12.9551436
## Kentucky          9.7     109       52 16.3  -67.92231  -83.5608566
## Louisiana        15.4     249       66 22.2 -194.84993  -30.0747857
## Maine             2.1      83       51  7.8  -63.43161   -8.3751863
## Maryland         11.3     300       67 27.8 -233.71833   48.5206319
## Massachusetts     4.4     149       85 16.3  -93.89436 -106.8894943
## Michigan         12.1     255       74 35.1 -207.86176  -32.3535756
## Minnesota         2.7      72       66 14.9  -65.68138  -23.6218683
## Mississippi      16.1     259       44 17.1 -193.67727   41.8469753
## Missouri          9.0     178       70 28.2  -99.69902 -146.9363486
## Montana           6.0     109       53 16.4 -102.55422   -9.8906062
## Nebraska          4.3     102       62 16.5  -81.94814   -8.3126180
## Nevada           12.2     252       81 46.0 -224.43659   40.4256132
## New Hampshire     2.1      57       56  9.5  -49.63545  -33.7453036
## New Jersey        7.4     159       89 18.8 -152.84113  -11.6274411
## New Mexico       11.4     285       70 32.1 -192.57846   61.5587283
## New York         11.1     254       86 26.1 -216.11699   36.4081282
## North Carolina   13.0     337       45 16.1 -129.31137 -283.9333766
## North Dakota      0.8      45       44  7.3  -54.54813   -0.3290096
## Ohio              7.3     120       75 21.4  -98.67129  -12.7011753
## Oklahoma          6.6     151       68 20.0 -140.26444   19.6635681
## Oregon            4.9     159       67 29.3  -90.91451 -134.2822019
## Pennsylvania      6.3     106       72 14.9 -111.36422   -5.5095502
## Rhode Island      3.4     174       87  8.3 -122.84293   -2.6673081
## South Carolina   14.4     279       48 22.5 -209.74003   46.2369768
## South Dakota      3.8      86       45 12.8  -53.63715  -65.8260256
## Tennessee        13.2     188       59 26.9 -158.88941  -22.3842141
## Texas            12.7     201       80 25.5 -147.16953   13.8088319
## Utah              3.2     120       80 22.9 -128.55049   12.9422324
## Vermont           2.2      48       32 11.2  -34.79666  -37.3683410
## Virginia          8.5     156       63 20.7 -137.72208  -16.1242322
## Washington        4.0     145       73 26.2 -113.16669    4.3864873
## West Virginia     5.7      81       39  9.3  -76.95290    9.4959571
## Wisconsin         2.6      53       66 10.8  -54.51128  -26.9978277
## Wyoming           6.8     161       60 15.6 -134.92677  -16.9910326
datos[,-(1:4)]
##                       pc1          pc2
## Alabama        -109.70674 -194.7112752
## Alaska         -200.93002  -40.5473153
## Arizona        -198.67595   59.0145557
## Arkansas       -154.13078   29.5446483
## California     -141.66518 -234.5123542
## Colorado       -181.98644  -24.4602694
## Connecticut     -87.23615  -19.4446345
## Delaware       -190.34537   34.6766247
## Florida        -153.19304 -280.3381377
## Georgia        -173.82313  -25.0709671
## Hawaii          -60.04080  -49.3852136
## Idaho          -109.02150   15.7574333
## Illinois       -125.99912 -199.2179979
## Indiana        -114.92576   -8.8481365
## Iowa            -52.53493  -23.5265544
## Kansas         -116.69053   12.9551436
## Kentucky        -67.92231  -83.5608566
## Louisiana      -194.84993  -30.0747857
## Maine           -63.43161   -8.3751863
## Maryland       -233.71833   48.5206319
## Massachusetts   -93.89436 -106.8894943
## Michigan       -207.86176  -32.3535756
## Minnesota       -65.68138  -23.6218683
## Mississippi    -193.67727   41.8469753
## Missouri        -99.69902 -146.9363486
## Montana        -102.55422   -9.8906062
## Nebraska        -81.94814   -8.3126180
## Nevada         -224.43659   40.4256132
## New Hampshire   -49.63545  -33.7453036
## New Jersey     -152.84113  -11.6274411
## New Mexico     -192.57846   61.5587283
## New York       -216.11699   36.4081282
## North Carolina -129.31137 -283.9333766
## North Dakota    -54.54813   -0.3290096
## Ohio            -98.67129  -12.7011753
## Oklahoma       -140.26444   19.6635681
## Oregon          -90.91451 -134.2822019
## Pennsylvania   -111.36422   -5.5095502
## Rhode Island   -122.84293   -2.6673081
## South Carolina -209.74003   46.2369768
## South Dakota    -53.63715  -65.8260256
## Tennessee      -158.88941  -22.3842141
## Texas          -147.16953   13.8088319
## Utah           -128.55049   12.9422324
## Vermont         -34.79666  -37.3683410
## Virginia       -137.72208  -16.1242322
## Washington     -113.16669    4.3864873
## West Virginia   -76.95290    9.4959571
## Wisconsin       -54.51128  -26.9978277
## Wyoming        -134.92677  -16.9910326