library(FactoMineR)
## Warning: package 'FactoMineR' was built under R version 4.4.3
library(factoextra)
## Warning: package 'factoextra' was built under R version 4.4.3
## Cargando paquete requerido: ggplot2
## Warning: package 'ggplot2' was built under R version 4.4.3
## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa
library(readxl)
## Warning: package 'readxl' was built under R version 4.4.3
library(readxl)
Datos1 <- read_excel("C:/Users/User/Downloads/Datos1.xlsx")
View(Datos1)


Datos1_pca <- Datos1[, -which(names(Datos1) == "id")]

Datos1_pca$diagnosis <- as.factor(Datos1_pca$diagnosis)


res.pca <- PCA(Datos1_pca, quali.sup = 1, graph = FALSE)

summary(res.pca)
## 
## Call:
## PCA(X = Datos1_pca, quali.sup = 1, graph = FALSE) 
## 
## 
## Eigenvalues
##                        Dim.1   Dim.2   Dim.3   Dim.4   Dim.5   Dim.6   Dim.7
## Variance              13.282   5.691   2.818   1.981   1.649   1.207   0.675
## % of var.             44.272  18.971   9.393   6.602   5.496   4.025   2.251
## Cumulative % of var.  44.272  63.243  72.636  79.239  84.734  88.759  91.010
##                        Dim.8   Dim.9  Dim.10  Dim.11  Dim.12  Dim.13  Dim.14
## Variance               0.477   0.417   0.351   0.294   0.261   0.241   0.157
## % of var.              1.589   1.390   1.169   0.980   0.871   0.805   0.523
## Cumulative % of var.  92.598  93.988  95.157  96.137  97.007  97.812  98.335
##                       Dim.15  Dim.16  Dim.17  Dim.18  Dim.19  Dim.20  Dim.21
## Variance               0.094   0.080   0.059   0.053   0.049   0.031   0.030
## % of var.              0.314   0.266   0.198   0.175   0.165   0.104   0.100
## Cumulative % of var.  98.649  98.915  99.113  99.288  99.453  99.557  99.657
##                       Dim.22  Dim.23  Dim.24  Dim.25  Dim.26  Dim.27  Dim.28
## Variance               0.027   0.024   0.018   0.015   0.008   0.007   0.002
## % of var.              0.091   0.081   0.060   0.052   0.027   0.023   0.005
## Cumulative % of var.  99.749  99.830  99.890  99.942  99.969  99.992  99.997
##                       Dim.29  Dim.30
## Variance               0.001   0.000
## % of var.              0.002   0.000
## Cumulative % of var. 100.000 100.000
## 
## Individuals (the 10 first)
##                             Dist    Dim.1    ctr   cos2    Dim.2    ctr   cos2
## 1                       | 10.710 |  9.193  1.118  0.737 |  1.949  0.117  0.033
## 2                       |  5.132 |  2.388  0.075  0.216 | -3.768  0.438  0.539
## 3                       |  6.119 |  5.734  0.435  0.878 | -1.075  0.036  0.031
## 4                       | 13.986 |  7.123  0.671  0.259 | 10.276  3.261  0.540
## 5                       |  5.868 |  3.935  0.205  0.450 | -1.948  0.117  0.110
## 6                       |  5.735 |  2.380  0.075  0.172 |  3.950  0.482  0.474
## 7                       |  3.970 |  2.239  0.066  0.318 | -2.690  0.223  0.459
## 8                       |  4.195 |  2.143  0.061  0.261 |  2.340  0.169  0.311
## 9                       |  6.017 |  3.175  0.133  0.278 |  3.392  0.355  0.318
## 10                      | 12.163 |  6.352  0.534  0.273 |  7.727  1.844  0.404
##                            Dim.3    ctr   cos2  
## 1                       | -1.123  0.079  0.011 |
## 2                       | -0.529  0.017  0.011 |
## 3                       | -0.552  0.019  0.008 |
## 4                       | -3.233  0.652  0.053 |
## 5                       |  1.390  0.120  0.056 |
## 6                       | -2.935  0.537  0.262 |
## 7                       | -1.640  0.168  0.171 |
## 8                       | -0.872  0.047  0.043 |
## 9                       | -3.120  0.607  0.269 |
## 10                      | -4.342  1.176  0.127 |
## 
## Variables (the 10 first)
##                            Dim.1    ctr   cos2    Dim.2    ctr   cos2    Dim.3
## radius_mean             |  0.798  4.792  0.636 | -0.558  5.469  0.311 | -0.014
## texture_mean            |  0.378  1.076  0.143 | -0.142  0.356  0.020 |  0.108
## perimeter_mean          |  0.829  5.177  0.688 | -0.513  4.630  0.264 | -0.016
## area_mean               |  0.805  4.884  0.649 | -0.551  5.340  0.304 |  0.048
## smoothness_mean         |  0.520  2.033  0.270 |  0.444  3.464  0.197 | -0.175
## compactness_mean        |  0.872  5.726  0.760 |  0.362  2.307  0.131 | -0.124
## concavity_mean          |  0.942  6.677  0.887 |  0.144  0.362  0.021 |  0.005
## concave points_mean     |  0.951  6.804  0.904 | -0.083  0.121  0.007 | -0.043
## symmetry_mean           |  0.504  1.909  0.254 |  0.454  3.623  0.206 | -0.068
## fractal_dimension_mean  |  0.235  0.414  0.055 |  0.875 13.438  0.765 | -0.038
##                            ctr   cos2  
## radius_mean              0.007  0.000 |
## texture_mean             0.417  0.012 |
## perimeter_mean           0.009  0.000 |
## area_mean                0.082  0.002 |
## smoothness_mean          1.088  0.031 |
## compactness_mean         0.549  0.015 |
## concavity_mean           0.001  0.000 |
## concave points_mean      0.065  0.002 |
## symmetry_mean            0.162  0.005 |
## fractal_dimension_mean   0.051  0.001 |
## 
## Supplementary categories
##                              Dist     Dim.1    cos2  v.test     Dim.2    cos2
## B                       |   2.251 |  -2.206   0.960 -18.720 |   0.346   0.024
## M                       |   3.791 |   3.715   0.960  18.720 |  -0.583   0.024
##                          v.test     Dim.3    cos2  v.test  
## B                         4.489 |   0.213   0.009   3.927 |
## M                        -4.489 |  -0.359   0.009  -3.927 |
fviz_pca_ind(res.pca, 
             habillage = 1, 
             addEllipses = TRUE,
             palette = c("#00AFBB", "#FC4E07"),
             repel = TRUE,
             title = "Individuos según diagnóstico")

fviz_pca_var(res.pca, 
             col.var = "cos2",
             gradient.cols = c("#00AFBB", "#E7B800", "#FC4E07"),
             title = "Variables activas en el PCA")

fviz_screeplot(res.pca, addlabels = TRUE, ylim = c(0, 50))

pc1 <- res.pca$ind$coord[, 1]
Datos1_pca$PC1 <- pc1

#Descripcion: La variable diagnosis se trató como variable ilustrativa porque no participa en el cálculo del PCA, pero permite interpretar los componentes.y Se conservaron las 30 variables numéricas que describen propiedades morfológicas de los tumores.

Se aplicó PCA indicando que diagnosis es una variable cualitativa ilustrativa. El PCA transforma las variables originales en nuevos componentes no correlacionados que explican la variabilidad total.o