library(pacman)
p_load("prettydoc","ISLR", "ggplot2", "xfun","DT","cluster","plotly","tm")
## Installing package into 'C:/Users/germa/OneDrive/Documentos/R/win-library/4.1'
## (as 'lib' is unspecified)
## also installing the dependency 'data.table'
## Warning: unable to access index for repository http://www.stats.ox.ac.uk/pub/RWin/bin/windows/contrib/4.1:
## no fue posible abrir la URL 'http://www.stats.ox.ac.uk/pub/RWin/bin/windows/contrib/4.1/PACKAGES'
## package 'data.table' successfully unpacked and MD5 sums checked
## package 'plotly' successfully unpacked and MD5 sums checked
##
## The downloaded binary packages are in
## C:\Users\germa\AppData\Local\Temp\RtmpQlIBDq\downloaded_packages
##
## plotly installed
## Warning: package 'plotly' was built under R version 4.1.3
## Installing package into 'C:/Users/germa/OneDrive/Documentos/R/win-library/4.1'
## (as 'lib' is unspecified)
## also installing the dependencies 'NLP', 'slam', 'xml2', 'BH'
## Warning: unable to access index for repository http://www.stats.ox.ac.uk/pub/RWin/bin/windows/contrib/4.1:
## no fue posible abrir la URL 'http://www.stats.ox.ac.uk/pub/RWin/bin/windows/contrib/4.1/PACKAGES'
## package 'NLP' successfully unpacked and MD5 sums checked
## package 'slam' successfully unpacked and MD5 sums checked
## package 'xml2' successfully unpacked and MD5 sums checked
## package 'BH' successfully unpacked and MD5 sums checked
## package 'tm' successfully unpacked and MD5 sums checked
##
## The downloaded binary packages are in
## C:\Users\germa\AppData\Local\Temp\RtmpQlIBDq\downloaded_packages
##
## tm installed
## Warning: package 'tm' was built under R version 4.1.3
Combinando Métodos - Matrimonio
Tomando en cuenta el análisis anterior, nos pudimos percatar de que no es posible determinar si un individuo tiene seguro médico únicamente tomando en cuenta su salario y su edad. Ahora en este nuevo análisis vamos a ver si con tu salario y tu edad se puede predecir tu estado civil, es decir, nuestra hipótesis alternativa sería que si se puede predecir tu estado civil únicamente tomando en cuenta tu sueldo.
Importar Datos
data("Wage")
wg <- Wage
datos <- data.frame(wage = Wage$wage, age= Wage$age)
datos2 <- data.frame(wage = Wage$wage,age=Wage$age , maritl= Wage$maritl)
datatable(datos2)
Contando los datos
table (wg$maritl)
##
## 1. Never Married 2. Married 3. Widowed 4. Divorced
## 648 2074 19 204
## 5. Separated
## 55
Graficando Variables
estadoCivil<-Wage$maritl
p<-ggplot(datos, aes(wage,age)) + geom_point(aes (col= estadoCivil), size=3)
ggplotly(p)
Analizando la gráfica podemos percatarnos de que la mayoría de las personas que cuentan con mejor salario tienden a estar casados, pero tambien podemos ver que no solo los que tienen mayor ingreso estan casados, están muy mezclados en todos los rangos de ingresos.
Clusters K-Means
Construyendo clusters utilizando K-means
wgsCluster <- kmeans(datos,centers=5, nstart = 20)
wgsCluster
## K-means clustering with 5 clusters of sizes 80, 546, 916, 446, 1012
##
## Cluster means:
## wage age
## 1 276.90658 46.88750
## 2 65.66964 34.97619
## 3 121.46355 44.44323
## 4 160.63692 45.16368
## 5 93.08103 43.02668
##
## Clustering vector:
## [1] 2 2 3 4 2 3 4 3 3 3 3 5 5 3 3 5 5 4 3 5 4 2 5 4 5 1 5 2 3 2 4 5 5 3 5 5 5
## [38] 5 5 2 2 3 2 2 5 4 1 4 2 3 4 5 1 2 3 2 4 4 5 3 4 3 5 5 3 3 4 4 5 2 5 2 2 5
## [75] 5 5 1 2 3 5 3 3 2 2 5 2 5 2 3 3 3 5 2 3 2 5 5 4 4 4 3 3 3 2 3 5 5 2 3 3 3
## [112] 5 2 3 5 5 3 4 1 5 5 5 4 3 5 2 3 5 3 5 5 3 4 3 3 2 3 3 5 5 2 5 5 2 2 3 4 3
## [149] 4 2 5 3 5 5 4 3 2 3 5 2 3 3 5 2 3 3 2 5 5 3 2 2 4 4 4 5 4 5 4 3 3 3 5 3 4
## [186] 5 5 5 5 5 4 5 2 5 4 4 3 4 5 3 2 3 3 2 3 5 1 4 3 2 3 2 4 5 2 3 4 2 5 5 2 5
## [223] 5 4 2 5 5 5 3 5 5 3 3 4 2 5 1 5 5 2 2 3 2 3 3 2 2 3 5 2 5 3 2 3 2 3 5 2 4
## [260] 5 2 3 3 5 5 4 5 5 2 2 3 3 3 3 3 5 3 3 3 2 2 3 3 4 5 5 5 4 2 3 5 3 4 2 5 2
## [297] 5 2 4 5 2 2 2 2 3 4 2 2 5 5 2 3 5 5 3 3 5 2 3 2 3 3 5 2 2 5 4 3 5 5 4 3 5
## [334] 2 4 5 5 3 2 3 3 5 5 3 4 5 5 1 3 4 3 4 5 5 4 3 5 3 2 5 3 4 4 5 2 2 5 3 4 1
## [371] 5 2 3 1 3 3 5 3 2 5 3 4 4 4 3 2 5 3 2 5 5 3 5 5 3 5 3 3 5 5 3 5 5 5 3 5 3
## [408] 5 2 3 4 5 5 5 3 3 5 3 4 5 5 5 3 4 3 4 5 3 5 4 3 5 4 2 2 2 5 5 4 3 3 3 1 5
## [445] 2 5 5 4 3 4 5 4 3 5 5 1 5 3 4 4 2 5 2 3 3 5 2 3 5 4 5 5 2 5 3 5 3 2 2 4 5
## [482] 3 5 2 5 2 2 5 3 1 3 5 5 3 3 3 3 5 3 2 3 5 4 1 5 3 5 2 5 3 5 3 2 3 5 1 5 4
## [519] 5 4 5 3 3 2 5 4 4 3 2 2 2 5 3 4 5 1 5 5 5 2 2 3 4 5 4 2 5 5 5 5 5 3 3 4 5
## [556] 2 5 5 3 2 5 2 5 1 5 4 3 5 5 5 4 3 3 5 3 2 5 2 5 5 1 5 2 5 3 2 4 4 5 3 5 5
## [593] 3 5 2 2 5 5 4 2 5 3 4 3 3 3 5 3 3 5 5 2 4 5 5 2 5 5 3 4 4 2 5 5 3 2 3 4 4
## [630] 3 3 4 4 5 3 4 5 2 3 3 5 3 1 3 3 3 2 3 3 2 4 2 4 3 3 3 5 1 5 5 5 5 5 5 2 5
## [667] 4 5 3 5 4 2 4 3 2 5 5 5 5 4 3 3 3 3 2 5 4 2 4 5 5 4 5 4 2 5 2 4 4 2 3 3 1
## [704] 2 4 4 3 5 5 2 3 3 5 3 2 5 3 4 3 5 5 3 5 4 5 3 5 3 4 3 1 4 3 4 4 3 5 3 3 3
## [741] 5 3 5 5 3 3 3 5 4 5 5 4 5 3 5 5 4 4 2 3 1 3 2 5 3 2 3 4 3 3 3 3 3 3 5 2 5
## [778] 2 3 5 4 5 4 2 3 3 5 3 3 4 3 4 1 5 5 5 4 4 5 3 5 4 3 4 5 5 4 2 2 4 3 3 5 5
## [815] 2 5 5 4 5 4 2 5 3 5 5 5 3 2 3 3 1 4 4 3 2 3 2 3 3 2 5 2 5 3 3 5 2 5 2 4 5
## [852] 3 2 5 2 4 5 3 5 5 5 4 3 4 5 2 3 3 5 4 3 3 5 5 3 3 5 5 5 5 3 2 2 5 3 3 5 3
## [889] 3 3 3 5 4 3 5 5 2 5 4 3 2 1 3 5 5 5 3 4 3 5 2 4 2 3 3 5 1 5 5 2 4 5 5 2 3
## [926] 3 5 2 3 4 5 3 5 2 3 2 2 5 5 5 3 5 2 4 5 4 4 2 3 3 3 4 2 4 5 1 4 5 3 2 3 4
## [963] 3 5 5 3 5 4 4 5 2 4 2 3 5 3 4 4 5 2 5 4 4 5 5 3 5 3 3 5 2 5 3 5 5 3 5 5 5
## [1000] 3 5 5 2 5 2 5 3 3 3 5 5 5 3 3 4 1 4 3 2 5 2 3 5 3 5 3 4 5 2 4 2 5 5 5 5 5
## [1037] 4 2 5 3 2 5 2 5 5 3 2 4 3 3 3 3 3 3 3 4 2 5 5 3 3 3 3 2 3 5 5 5 3 5 5 5 3
## [1074] 5 2 3 2 4 2 5 3 2 3 5 1 2 5 3 3 3 2 3 2 3 5 3 4 2 2 3 5 3 5 3 5 5 3 2 5 5
## [1111] 3 3 1 2 5 5 5 5 3 5 5 3 2 3 3 5 3 1 5 4 5 5 5 3 5 3 5 5 4 5 3 3 5 5 5 5 3
## [1148] 5 3 5 3 2 4 5 5 5 5 5 3 3 5 4 4 4 5 2 5 3 3 5 5 5 2 5 5 3 2 4 3 4 5 5 3 2
## [1185] 3 4 2 3 5 4 2 3 4 5 5 3 2 4 4 3 4 5 3 5 4 3 3 2 3 2 3 2 3 5 4 3 5 5 5 4 4
## [1222] 5 3 3 5 3 3 5 5 1 5 5 5 4 3 5 2 3 3 3 5 2 5 3 5 5 2 3 3 2 4 4 3 3 3 5 4 2
## [1259] 5 4 2 3 5 5 2 3 5 2 3 2 5 1 3 4 3 4 3 3 3 5 5 1 1 4 5 3 3 4 4 5 5 5 5 1 3
## [1296] 5 3 2 5 5 3 3 3 4 3 4 5 4 3 1 2 3 4 5 3 5 2 3 2 2 2 3 2 2 2 1 3 2 2 5 5 4
## [1333] 5 3 5 3 4 2 5 5 3 3 4 4 3 4 5 5 2 3 3 2 4 2 5 4 3 2 5 3 5 5 5 5 3 5 5 5 2
## [1370] 3 3 1 3 2 3 3 2 4 5 3 3 5 3 3 5 4 3 4 3 5 2 3 3 2 5 5 5 3 2 4 3 2 5 5 5 2
## [1407] 3 5 3 3 4 3 3 5 2 5 3 2 4 2 4 3 4 5 3 3 2 4 5 5 5 5 5 5 5 4 3 5 5 5 5 4 3
## [1444] 5 3 2 5 5 5 5 3 4 4 5 3 2 2 5 5 2 2 2 5 2 4 3 2 3 3 5 5 3 3 2 4 3 2 5 5 3
## [1481] 4 4 2 1 3 3 3 4 2 5 2 5 3 2 5 5 3 4 2 3 5 5 5 3 3 4 4 3 2 5 5 3 3 4 3 4 3
## [1518] 2 3 3 2 2 2 5 5 5 3 5 3 4 5 3 5 2 2 5 2 5 5 2 5 3 4 2 5 2 3 2 4 3 5 5 3 4
## [1555] 5 5 5 5 5 2 3 5 2 5 2 5 5 5 3 3 5 5 5 3 5 5 3 5 4 5 4 4 2 5 1 5 4 3 5 5 3
## [1592] 3 5 3 2 5 3 3 2 3 2 4 5 5 3 5 4 4 2 5 3 3 2 5 5 3 3 3 5 2 3 5 5 3 2 2 5 5
## [1629] 3 5 2 3 3 5 3 1 4 4 3 5 2 3 4 2 5 5 2 2 3 5 5 3 3 5 4 5 3 5 5 5 3 3 3 2 5
## [1666] 2 5 4 5 4 2 2 2 3 3 2 3 3 3 2 3 3 3 2 2 3 1 5 3 3 5 3 5 4 3 5 3 4 5 5 5 4
## [1703] 5 4 4 5 3 5 3 5 3 4 4 2 4 5 3 3 5 5 3 5 3 4 2 2 5 2 5 5 3 2 5 3 4 5 5 5 5
## [1740] 5 5 4 2 4 3 3 3 4 5 3 2 2 4 1 4 4 2 2 5 3 5 3 5 3 5 5 5 2 3 2 5 2 3 5 4 2
## [1777] 4 3 2 3 4 2 5 5 5 2 3 2 3 3 2 5 5 2 4 3 5 5 2 3 3 5 3 5 3 5 4 3 2 3 5 2 3
## [1814] 2 3 5 5 3 5 5 5 3 4 3 3 3 3 4 1 2 5 3 2 1 4 5 3 5 5 5 5 3 3 3 2 5 5 3 5 5
## [1851] 5 5 5 5 5 5 5 3 3 3 3 5 5 4 3 5 2 3 5 3 4 3 3 4 3 2 5 3 2 2 4 3 1 5 4 2 5
## [1888] 5 3 3 3 4 2 3 5 4 4 5 5 2 5 3 2 4 5 3 2 5 5 3 5 5 5 4 5 3 2 5 5 2 3 5 5 4
## [1925] 5 5 5 2 3 4 3 3 2 3 4 2 5 5 5 5 3 2 2 3 3 4 4 5 3 3 5 5 5 4 3 5 4 5 3 5 3
## [1962] 2 3 3 5 5 2 5 3 4 4 2 3 5 3 5 3 4 3 3 3 5 5 5 1 2 5 2 5 4 5 3 5 3 3 3 5 4
## [1999] 3 4 5 5 5 4 2 3 3 5 5 3 2 3 5 5 5 3 4 5 4 1 3 5 5 5 2 5 1 5 5 3 5 4 5 2 2
## [2036] 5 4 3 2 3 3 3 5 5 5 2 3 2 5 5 5 3 4 4 5 3 2 5 4 2 3 2 4 5 3 5 5 5 3 4 5 5
## [2073] 2 2 5 3 3 5 3 5 2 2 5 5 4 3 4 5 2 3 2 3 2 5 2 2 3 2 5 2 5 4 4 5 3 3 5 4 4
## [2110] 3 1 5 5 3 4 3 1 3 3 5 3 3 3 3 3 3 5 3 5 5 5 5 4 5 3 3 5 3 5 3 5 3 4 5 5 3
## [2147] 4 3 1 3 5 4 3 3 3 4 5 4 3 3 2 3 2 5 3 5 5 5 2 2 2 4 3 4 4 5 5 5 2 3 5 4 4
## [2184] 3 3 2 2 4 3 5 4 2 3 4 5 2 5 5 5 5 2 2 3 5 3 3 5 3 3 5 2 3 3 3 5 3 2 5 3 5
## [2221] 2 3 5 4 5 2 4 4 4 5 2 5 2 5 5 3 5 5 5 2 2 2 5 5 3 4 3 2 2 5 2 3 5 2 1 5 3
## [2258] 4 5 5 3 4 5 5 1 5 5 3 5 4 5 3 1 3 3 3 2 3 3 5 4 4 5 4 5 5 1 5 4 3 4 3 5 5
## [2295] 5 2 3 2 2 4 5 2 3 4 3 3 5 3 2 4 4 4 2 5 4 2 3 5 3 4 3 5 5 5 4 5 5 2 3 2 5
## [2332] 4 4 5 4 5 3 2 3 4 3 3 2 2 2 5 5 5 4 5 3 1 4 5 3 3 3 4 5 2 3 4 5 3 5 3 2 5
## [2369] 1 1 5 4 3 5 2 4 4 2 3 3 5 1 4 5 4 3 3 5 3 3 3 3 5 3 2 5 1 2 3 5 5 3 5 4 2
## [2406] 2 3 2 3 2 3 2 2 5 5 5 4 3 3 4 3 5 3 2 2 4 3 3 5 2 5 2 3 3 3 3 3 2 4 2 5 5
## [2443] 2 3 4 3 3 5 5 4 4 4 2 2 2 5 5 3 5 2 3 3 3 5 5 3 4 2 2 3 3 1 4 3 5 4 3 1 3
## [2480] 2 3 3 5 5 2 3 2 3 3 2 3 5 3 3 2 5 5 3 5 5 4 2 5 2 5 3 4 5 3 2 2 2 3 3 2 3
## [2517] 5 5 3 3 4 3 2 1 5 5 3 5 3 2 3 1 1 5 3 4 5 2 5 5 3 3 2 3 3 5 4 3 5 5 5 5 3
## [2554] 2 3 2 4 4 2 3 5 4 4 3 3 3 3 5 3 2 5 2 4 3 5 5 5 2 2 5 4 3 3 3 5 2 2 3 3 3
## [2591] 3 3 3 5 5 3 5 3 3 3 5 2 4 4 4 5 4 4 3 2 4 5 2 5 5 3 2 3 5 5 4 5 5 3 4 5 2
## [2628] 2 5 3 3 2 4 5 3 2 2 3 5 2 4 3 3 5 4 5 3 4 5 3 5 2 3 2 5 5 4 2 2 2 3 2 3 4
## [2665] 2 3 3 3 3 4 2 5 2 5 2 3 2 5 3 3 1 2 3 5 2 1 4 3 5 1 3 3 1 2 1 3 4 3 5 2 3
## [2702] 3 4 5 4 1 5 5 2 3 3 2 1 5 2 4 2 3 5 4 2 3 4 4 3 3 4 3 3 2 5 2 3 3 5 5 3 2
## [2739] 5 3 5 3 3 5 5 5 5 3 4 5 5 2 4 4 5 5 3 2 3 3 2 2 3 2 5 3 5 3 4 5 2 3 3 3 4
## [2776] 5 4 2 2 4 5 5 4 5 4 5 3 5 3 4 2 2 5 3 4 3 1 5 5 3 3 3 2 4 4 3 2 5 4 5 3 3
## [2813] 3 5 4 3 2 5 3 3 3 3 5 3 2 2 4 3 2 4 4 3 3 4 1 5 3 3 2 5 4 3 4 2 2 3 4 3 5
## [2850] 3 5 5 2 3 3 5 4 4 4 5 2 5 2 4 2 5 5 3 5 5 5 3 5 3 2 5 3 3 3 3 1 2 3 2 2 2
## [2887] 5 2 3 4 5 5 5 1 2 5 2 2 5 4 5 4 5 3 5 2 2 5 2 4 3 3 2 5 3 5 3 3 2 5 3 3 5
## [2924] 2 3 1 3 5 4 5 3 5 3 5 5 4 5 4 2 5 3 5 2 2 5 5 5 3 3 2 3 4 5 3 2 5 5 5 5 3
## [2961] 2 5 5 2 2 5 5 5 5 2 4 3 5 4 3 5 2 5 5 4 2 2 3 5 5 5 3 4 3 4 3 3 3 5 3 4 5
## [2998] 2 5 5
##
## Within cluster sum of squares by cluster:
## [1] 20954.07 169284.23 173404.41 159978.87 212127.25
## (between_SS / total_SS = 86.9 %)
##
## Available components:
##
## [1] "cluster" "centers" "totss" "withinss" "tot.withinss"
## [6] "betweenss" "size" "iter" "ifault"
- Se comparan los clusters con los datos originales
table(wgsCluster$cluster,wg$maritl)
##
## 1. Never Married 2. Married 3. Widowed 4. Divorced 5. Separated
## 1 5 72 0 2 1
## 2 253 239 3 39 12
## 3 124 717 7 56 12
## 4 35 381 1 25 4
## 5 231 665 8 82 26
- Se agrupan los clusters
clusplot(datos, wgsCluster$cluster, color=T, shade=T, lines=0)
Podemos percatarnos de que en la parte final es más fácil distinguir los resultados, pero en la parte inicial y media no hay una preferencia alguna entre los diferentes estados civiles.diferencia entre los salarios altos y bajos.
tot.withinss <- vector(mode="character", length=10)
for (i in 1:10){
wgsCluster <- kmeans(datos, center=i, nstart=20)
tot.withinss[i] <- wgsCluster$tot.withinss
}
- Se grafíca el codo obtenido
plot(1:10, tot.withinss, type="b", pch=19)
Conclusión
Podemos concluir que como tal no existe una relación estrecha para definir el estado civil de una persona meramente con su ingreso. Es verdad que las personas con mas ingresos tienden a estar casadas, pero no es un factor determinante para poder predecirlo.
Descargas
Descarga este código haciendo click al siguiente enlace.
xfun::embed_file("A6U1_2Equipo2.Rmd")