install.packages(c(“FactoMineR”, “factoextra”))
library("FactoMineR")
library("factoextra")
## Loading required package: ggplot2
## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa
library("ade4")
##
## Attaching package: 'ade4'
## The following object is masked from 'package:FactoMineR':
##
## reconst
library("ExPosition")
## Loading required package: prettyGraphs
library(forecast)
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
library(h2o)
##
## ----------------------------------------------------------------------
##
## Your next step is to start H2O:
## > h2o.init()
##
## For H2O package documentation, ask for help:
## > ??h2o
##
## After starting H2O, you can use the Web UI at http://localhost:54321
## For more information visit https://docs.h2o.ai
##
## ----------------------------------------------------------------------
##
## Attaching package: 'h2o'
## The following objects are masked from 'package:stats':
##
## cor, sd, var
## The following objects are masked from 'package:base':
##
## &&, %*%, %in%, ||, apply, as.factor, as.numeric, colnames,
## colnames<-, ifelse, is.character, is.factor, is.numeric, log,
## log10, log1p, log2, round, signif, trunc
library(TSstudio)
library(tidyr)
library(Quandl)
## Loading required package: xts
## Loading required package: zoo
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
library(ggplot2)
library(maps)
library(ggmap)
## ℹ Google's Terms of Service: <https://mapsplatform.google.com>
## ℹ Please cite ggmap if you use it! Use `citation("ggmap")` for details.
library(sp)
library(stats)
library(graphics)
library(rnaturalearth)
## Support for Spatial objects (`sp`) will be deprecated in {rnaturalearth} and will be removed in a future release of the package. Please use `sf` objects with {rnaturalearth}. For example: `ne_download(returnclass = 'sf')`
library(listviewer)
library(plotly)
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggmap':
##
## wind
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
library(dplyr)
##
## ######################### Warning from 'xts' package ##########################
## # #
## # The dplyr lag() function breaks how base R's lag() function is supposed to #
## # work, which breaks lag(my_xts). Calls to lag(my_xts) that you type or #
## # source() into this session won't work correctly. #
## # #
## # Use stats::lag() to make sure you're not using dplyr::lag(), or you can add #
## # conflictRules('dplyr', exclude = 'lag') to your .Rprofile to stop #
## # dplyr from breaking base R's lag() function. #
## # #
## # Code in packages is not affected. It's protected by R's namespace mechanism #
## # Set `options(xts.warn_dplyr_breaks_lag = FALSE)` to suppress this warning. #
## # #
## ###############################################################################
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:xts':
##
## first, last
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(carat)
library(sf)
## Linking to GEOS 3.8.0, GDAL 3.0.4, PROJ 6.3.1; sf_use_s2() is TRUE
#SECCIÓN 5
data(poison)
head(poison[, 1:7], 3)
## Age Time Sick Sex Nausea Vomiting Abdominals
## 1 9 22 Sick_y F Nausea_y Vomit_n Abdo_y
## 2 5 0 Sick_n F Nausea_n Vomit_n Abdo_n
## 3 6 16 Sick_y F Nausea_n Vomit_y Abdo_y
poison.active <- poison[1:55, 5:15]
head(poison.active[, 1:6],3)
## Nausea Vomiting Abdominals Fever Diarrhae Potato
## 1 Nausea_y Vomit_n Abdo_y Fever_y Diarrhea_y Potato_y
## 2 Nausea_n Vomit_n Abdo_n Fever_n Diarrhea_n Potato_y
## 3 Nausea_n Vomit_y Abdo_y Fever_y Diarrhea_y Potato_y
summary(poison.active)[, 1:4]
## Nausea Vomiting Abdominals Fever
## Nausea_n:43 Vomit_n:33 Abdo_n:18 Fever_n:20
## Nausea_y:12 Vomit_y:22 Abdo_y:37 Fever_y:35
for (i in 1:4) {
plot(poison.active[,i], main=colnames(poison.active)[i],
ylab = "Count", col= "steelblue", las = 2)
}
MCA(poison.active, ncp = 5, graph = TRUE)
## Warning: ggrepel: 10 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
## **Results of the Multiple Correspondence Analysis (MCA)**
## The analysis was performed on 55 individuals, described by 11 variables
## *The results are available in the following objects:
##
## name description
## 1 "$eig" "eigenvalues"
## 2 "$var" "results for the variables"
## 3 "$var$coord" "coord. of the categories"
## 4 "$var$cos2" "cos2 for the categories"
## 5 "$var$contrib" "contributions of the categories"
## 6 "$var$v.test" "v-test for the categories"
## 7 "$var$eta2" "coord. of variables"
## 8 "$ind" "results for the individuals"
## 9 "$ind$coord" "coord. for the individuals"
## 10 "$ind$cos2" "cos2 for the individuals"
## 11 "$ind$contrib" "contributions of the individuals"
## 12 "$call" "intermediate results"
## 13 "$call$marge.col" "weights of columns"
## 14 "$call$marge.li" "weights of rows"
res.mca <- MCA(poison.active, graph = FALSE)
print (res.mca)
## **Results of the Multiple Correspondence Analysis (MCA)**
## The analysis was performed on 55 individuals, described by 11 variables
## *The results are available in the following objects:
##
## name description
## 1 "$eig" "eigenvalues"
## 2 "$var" "results for the variables"
## 3 "$var$coord" "coord. of the categories"
## 4 "$var$cos2" "cos2 for the categories"
## 5 "$var$contrib" "contributions of the categories"
## 6 "$var$v.test" "v-test for the categories"
## 7 "$var$eta2" "coord. of variables"
## 8 "$ind" "results for the individuals"
## 9 "$ind$coord" "coord. for the individuals"
## 10 "$ind$cos2" "cos2 for the individuals"
## 11 "$ind$contrib" "contributions of the individuals"
## 12 "$call" "intermediate results"
## 13 "$call$marge.col" "weights of columns"
## 14 "$call$marge.li" "weights of rows"
eig.val <- get_eigenvalue(res.mca)
# head(eig.val)
fviz_screeplot(res.mca, addlabels = TRUE, ylim = c(0,45))
fviz_mca_biplot(res.mca,
repel = TRUE, #Avoid text overlapping (slow if many point
ggtheme = theme_minimal())
var <- get_mca_var(res.mca)
var
## Multiple Correspondence Analysis Results for variables
## ===================================================
## Name Description
## 1 "$coord" "Coordinates for categories"
## 2 "$cos2" "Cos2 for categories"
## 3 "$contrib" "contributions of categories"
# Coordinates
head(var$coord)
## Dim 1 Dim 2 Dim 3 Dim 4 Dim 5
## Nausea_n 0.2673909 0.12139029 -0.265583253 0.03376130 0.07370500
## Nausea_y -0.9581506 -0.43498187 0.951673323 -0.12097801 -0.26410958
## Vomit_n 0.4790279 -0.40919465 0.084492799 0.27361142 0.05245250
## Vomit_y -0.7185419 0.61379197 -0.126739198 -0.41041713 -0.07867876
## Abdo_n 1.3180221 -0.03574501 -0.005094243 -0.15360951 -0.06986987
## Abdo_y -0.6411999 0.01738946 0.002478280 0.07472895 0.03399075
# Cos2: quality on the factore map
head(var$cos2)
## Dim 1 Dim 2 Dim 3 Dim 4 Dim 5
## Nausea_n 0.2562007 0.0528025759 2.527485e-01 0.004084375 0.019466197
## Nausea_y 0.2562007 0.0528025759 2.527485e-01 0.004084375 0.019466197
## Vomit_n 0.3442016 0.2511603912 1.070855e-02 0.112294813 0.004126898
## Vomit_y 0.3442016 0.2511603912 1.070855e-02 0.112294813 0.004126898
## Abdo_n 0.8451157 0.0006215864 1.262496e-05 0.011479077 0.002374929
## Abdo_y 0.8451157 0.0006215864 1.262496e-05 0.011479077 0.002374929
#Contributions to the principal components
head(var$contrib)
## Dim 1 Dim 2 Dim 3 Dim 4 Dim 5
## Nausea_n 1.515869 0.81100008 4.670018e+00 0.08449397 0.48977906
## Nausea_y 5.431862 2.90608363 1.673423e+01 0.30277007 1.75504164
## Vomit_n 3.733667 7.07226253 3.627455e-01 4.25893721 0.19036376
## Vomit_y 5.600500 10.60839380 5.441183e-01 6.38840581 0.28554563
## Abdo_n 15.417637 0.02943661 7.192511e-04 0.73219636 0.18424268
## Abdo_y 7.500472 0.01432051 3.499060e-04 0.35620363 0.08963157
fviz_mca_var(res.mca, choice = "mca.cor",
repel = TRUE, #Avoid text overlapping (slow)
ggtheme = theme_minimal())
head(round(var$coord, 2), 4)
## Dim 1 Dim 2 Dim 3 Dim 4 Dim 5
## Nausea_n 0.27 0.12 -0.27 0.03 0.07
## Nausea_y -0.96 -0.43 0.95 -0.12 -0.26
## Vomit_n 0.48 -0.41 0.08 0.27 0.05
## Vomit_y -0.72 0.61 -0.13 -0.41 -0.08
fviz_mca_var(res.mca,
repel = TRUE, #Avoid text overlapping (slow)
ggtheme = theme_minimal())
fviz_mca_var(res.mca, col.var="black", shape.var =15,
repel = TRUE)
head(var$cos2, 4)
## Dim 1 Dim 2 Dim 3 Dim 4 Dim 5
## Nausea_n 0.2562007 0.05280258 0.25274850 0.004084375 0.019466197
## Nausea_y 0.2562007 0.05280258 0.25274850 0.004084375 0.019466197
## Vomit_n 0.3442016 0.25116039 0.01070855 0.112294813 0.004126898
## Vomit_y 0.3442016 0.25116039 0.01070855 0.112294813 0.004126898
#Color by cos2 values: quality on the factor map
fviz_mca_var(res.mca, col.var = "cos2",
gradient.cols = c("#00AFBB", "#E7B800", "#FC4E07"),
repel = TRUE, # Avoid text overlapping
ggtheme = theme_minimal())
# Change the transparency by cos2 values
fviz_mca_var(res.mca, alpha.var="cos2",
repel = TRUE,
ggtheme = theme_minimal())
library("corrplot")
## corrplot 0.92 loaded
corrplot(var$cos2, is.corr=FALSE)
# Cos2 of variable categories on Dim.1 and Dim.2
fviz_cos2(res.mca, choice = "var", axes = 1:2)
head(round(var$contrib,2), 4)
## Dim 1 Dim 2 Dim 3 Dim 4 Dim 5
## Nausea_n 1.52 0.81 4.67 0.08 0.49
## Nausea_y 5.43 2.91 16.73 0.30 1.76
## Vomit_n 3.73 7.07 0.36 4.26 0.19
## Vomit_y 5.60 10.61 0.54 6.39 0.29
# Contributions of rows to dimension 1
fviz_contrib(res.mca, choice = "var", axes = 1, top = 15)
# Contributions of rows to dimension 2
fviz_contrib(res.mca, choice = "var", axes = 2, top = 15)
# Total contribution to dimension 1 and 2
fviz_contrib(res.mca, choice = "var", axes = 1:2, top = 15)
fviz_mca_var(res.mca, col.var = "contrib",
gradient.cols = c("#00AFBB", "#E7B800", "#FC4E07"),
repel = TRUE, # avoid text overlapping (slow)
ggtheme = theme_minimal()
)
# Change the transparency by contrib values
fviz_mca_var(res.mca, alpha.var="contrib",
repel = TRUE,
ggtheme = theme_minimal())
ind <- get_mca_ind(res.mca)
ind
## Multiple Correspondence Analysis Results for individuals
## ===================================================
## Name Description
## 1 "$coord" "Coordinates for the individuals"
## 2 "$cos2" "Cos2 for the individuals"
## 3 "$contrib" "contributions of the individuals"
# Coordinates of colum points
head(ind$coord)
## Dim 1 Dim 2 Dim 3 Dim 4 Dim 5
## 1 -0.4525811 -0.26415072 0.17151614 0.01369348 -0.11696806
## 2 0.8361700 -0.03193457 -0.07208249 -0.08550351 0.51978710
## 3 -0.4481892 0.13538726 -0.22484048 -0.14170168 -0.05004753
## 4 0.8803694 -0.08536230 -0.02052044 -0.07275873 -0.22935022
## 5 -0.4481892 0.13538726 -0.22484048 -0.14170168 -0.05004753
## 6 -0.3594324 -0.43604390 -1.20932223 1.72464616 0.04348157
# Quality of representation
head(ind$cos2)
## Dim 1 Dim 2 Dim 3 Dim 4 Dim 5
## 1 0.34652591 0.1180447167 0.0497683175 0.0003172275 0.0231460846
## 2 0.55589562 0.0008108236 0.0041310808 0.0058126211 0.2148103098
## 3 0.54813888 0.0500176790 0.1379484860 0.0547920948 0.0068349171
## 4 0.74773962 0.0070299584 0.0004062504 0.0051072923 0.0507479873
## 5 0.54813888 0.0500176790 0.1379484860 0.0547920948 0.0068349171
## 6 0.02485357 0.0365775483 0.2813443706 0.5722083217 0.0003637178
# Contributions
head(ind$contrib)
## Dim 1 Dim 2 Dim 3 Dim 4 Dim 5
## 1 1.110927 0.98238297 0.498254685 0.003555817 0.31554778
## 2 3.792117 0.01435818 0.088003703 0.138637089 6.23134138
## 3 1.089470 0.25806722 0.856229950 0.380768961 0.05776914
## 4 4.203611 0.10259105 0.007132055 0.100387990 1.21319013
## 5 1.089470 0.25806722 0.856229950 0.380768961 0.05776914
## 6 0.700692 2.67693398 24.769968729 56.404214518 0.04360547
fviz_mca_ind(res.mca, col.ind = "cos2",
gradients.col = c("#00AFBB", "#E7B800", "#FC4E07"),
repel = TRUE, # Avoid text overlapping (slow if many points
ggtheme = theme_minimal())
# Cos2 of individuals
fviz_cos2(res.mca, choice = "ind", axes = 1:2, top = 20)
# Contribution of individuals to the dimensions
fviz_contrib(res.mca, choice= "ind", axes = 1:2, top = 20)
fviz_mca_ind(res.mca,
label = "none", # hide individual labels
habillage = "Vomiting", # color by groups
pallete = c("#00AFBB", "#E7B800"),
addEllipses = TRUE, ellipse.type = "confidence",
ggtheme = theme_minimal())
# habillage = index of the colum to be used as grouping variable
fviz_mca_ind(res.mca, habillage = 2, addEllipses = TRUE)
# habillage = external grouping variable
fviz_mca_ind(res.mca, habillage = poison$Vomiting, addEllipses = TRUE)
fviz_ellipses(res.mca, c("Vomiting", "Fever"),
geom = "point")
## Warning: `gather_()` was deprecated in tidyr 1.2.0.
## ℹ Please use `gather()` instead.
## ℹ The deprecated feature was likely used in the factoextra package.
## Please report the issue at <https://github.com/kassambara/factoextra/issues>.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
fviz_ellipses(res.mca, 1:4, geom = "point")
res.desc <- dimdesc(res.mca, axes = c(1,2))
# Description of dimension 1
res.desc[[1]]
##
## Link between the variable and the categorical variable (1-way anova)
## =============================================
## R2 p.value
## Abdominals 0.8451157 4.055640e-23
## Diarrhae 0.7994680 3.910776e-20
## Fever 0.7846788 2.600566e-19
## Mayo 0.3829749 4.756234e-07
## Vomiting 0.3442016 2.510738e-06
## Nausea 0.2562007 8.062777e-05
## Cheese 0.1944181 7.534834e-04
##
## Link between variable and the categories of the categorical variables
## ================================================================
## Estimate p.value
## Abdominals=Abdo_n 0.5671866 4.055640e-23
## Diarrhae=Diarrhea_n 0.5380920 3.910776e-20
## Fever=Fever_n 0.5330918 2.600566e-19
## Mayo=Mayo_n 0.4644981 4.756234e-07
## Vomiting=Vomit_n 0.3466915 2.510738e-06
## Nausea=Nausea_n 0.3547892 8.062777e-05
## Cheese=Cheese_n 0.3830043 7.534834e-04
## Cheese=Cheese_y -0.3830043 7.534834e-04
## Nausea=Nausea_y -0.3547892 8.062777e-05
## Vomiting=Vomit_y -0.3466915 2.510738e-06
## Mayo=Mayo_y -0.4644981 4.756234e-07
## Fever=Fever_y -0.5330918 2.600566e-19
## Diarrhae=Diarrhea_y -0.5380920 3.910776e-20
## Abdominals=Abdo_y -0.5671866 4.055640e-23
# Description of dimension 2
res.desc[[2]]
##
## Link between the variable and the categorical variable (1-way anova)
## =============================================
## R2 p.value
## Courgette 0.4464145 2.500166e-08
## Potato 0.3957543 2.690662e-07
## Vomiting 0.2511604 9.728027e-05
## Icecream 0.1409011 4.743927e-03
##
## Link between variable and the categories of the categorical variables
## ================================================================
## Estimate p.value
## Courgette=Courg_n 0.4176013 2.500166e-08
## Potato=Potato_y 0.4977523 2.690662e-07
## Vomiting=Vomit_y 0.1838104 9.728027e-05
## Icecream=Icecream_n 0.2597197 4.743927e-03
## Icecream=Icecream_y -0.2597197 4.743927e-03
## Vomiting=Vomit_n -0.1838104 9.728027e-05
## Potato=Potato_n -0.4977523 2.690662e-07
## Courgette=Courg_y -0.4176013 2.500166e-08
MCA(poison.active, ind.sup = NULL, quanti.sup = NULL, quali.sup=NULL,
graph = TRUE, axes = c(1,2))
## Warning: ggrepel: 10 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
## **Results of the Multiple Correspondence Analysis (MCA)**
## The analysis was performed on 55 individuals, described by 11 variables
## *The results are available in the following objects:
##
## name description
## 1 "$eig" "eigenvalues"
## 2 "$var" "results for the variables"
## 3 "$var$coord" "coord. of the categories"
## 4 "$var$cos2" "cos2 for the categories"
## 5 "$var$contrib" "contributions of the categories"
## 6 "$var$v.test" "v-test for the categories"
## 7 "$var$eta2" "coord. of variables"
## 8 "$ind" "results for the individuals"
## 9 "$ind$coord" "coord. for the individuals"
## 10 "$ind$cos2" "cos2 for the individuals"
## 11 "$ind$contrib" "contributions of the individuals"
## 12 "$call" "intermediate results"
## 13 "$call$marge.col" "weights of columns"
## 14 "$call$marge.li" "weights of rows"
res.mca <- MCA(poison, ind.sup =53:55,
quanti.sup = 1:2, quali.sup = 3:4, graph=FALSE)
# Supplementary qualitative variable categories
res.mca$quali.sup
## $coord
## Dim 1 Dim 2 Dim 3 Dim 4 Dim 5
## Sick_n 1.41809140 0.0020394048 0.13199139 -0.016036841 -0.08354663
## Sick_y -0.63026284 -0.0009064021 -0.05866284 0.007127485 0.03713184
## F -0.03108147 0.1123143957 0.05033124 -0.055927173 -0.06832928
## M 0.03356798 -0.1212995474 -0.05435774 0.060401347 0.07379562
##
## $cos2
## Dim 1 Dim 2 Dim 3 Dim 4 Dim 5
## Sick_n 0.893770319 1.848521e-06 0.007742990 0.0001143023 0.003102240
## Sick_y 0.893770319 1.848521e-06 0.007742990 0.0001143023 0.003102240
## F 0.001043342 1.362369e-02 0.002735892 0.0033780765 0.005042401
## M 0.001043342 1.362369e-02 0.002735892 0.0033780765 0.005042401
##
## $v.test
## Dim 1 Dim 2 Dim 3 Dim 4 Dim 5
## Sick_n 6.7514655 0.009709509 0.6284047 -0.07635063 -0.3977615
## Sick_y -6.7514655 -0.009709509 -0.6284047 0.07635063 0.3977615
## F -0.2306739 0.833551410 0.3735378 -0.41506855 -0.5071119
## M 0.2306739 -0.833551410 -0.3735378 0.41506855 0.5071119
##
## $eta2
## Dim 1 Dim 2 Dim 3 Dim 4 Dim 5
## Sick 0.893770319 1.848521e-06 0.007742990 0.0001143023 0.003102240
## Sex 0.001043342 1.362369e-02 0.002735892 0.0033780765 0.005042401
# Supplementary quantitative variables
res.mca$quanti
## $coord
## Dim 1 Dim 2 Dim 3 Dim 4 Dim 5
## Age 0.003934896 -0.00741340 -0.26494536 0.20015501 0.02928483
## Time -0.838158507 -0.08330586 -0.08718851 -0.08421599 -0.02316931
# Supplementary individuals
res.mca$ind.sup
## $coord
## Dim 1 Dim 2 Dim 3 Dim 4 Dim 5
## 53 1.0835684 0.5172478 0.5794063 0.5390903 0.4553650
## 54 -0.1249473 0.1417271 -0.1765234 -0.1526587 -0.2779565
## 55 -0.4315948 0.1270468 -0.2071580 -0.1186804 -0.1891760
##
## $cos2
## Dim 1 Dim 2 Dim 3 Dim 4 Dim 5
## 53 0.36304957 0.08272764 0.10380536 0.08986204 0.06411692
## 54 0.03157652 0.04062716 0.06302535 0.04713607 0.15626590
## 55 0.50232519 0.04352713 0.11572730 0.03798314 0.09650827
#Biplot of individuals and variable categories
fviz_mca_biplot(res.mca, repel = TRUE,
ggtheme = theme_minimal())
fviz_mca_var(res.mca, choice = "mca.cor",
repel = TRUE)
fviz_mca_var(res.mca, repel = TRUE,
ggtheme = theme_minimal())
fviz_mca_var(res.mca, choice = "quanti.sup",
ggtheme = theme_minimal())
fviz_mca_ind(res.mca,
label = "ind.sup", #Show the label of ind.sup only
ggtheme = theme_minimal())
# Visualize variable categories with cos2 >= 0.4
fviz_mca_var(res.mca, select.var = list(cos2 = 0.4))
# Top 10 active variables with the highest cos2
fviz_mca_var(res.mca, select.var = list(cos2 = 10))
# Select by names
name <- list(name = c("Fever_n", "Abdo_y", "Diarrhea_n", "Fever_Y", "Vomit_Y", "Vomit_n"))
fviz_mca_var(res.mca, select.var = name)
# top 5 contributing individuals and variable categories
fviz_mca_biplot(res.mca, select.ind = list(contrib = 5),
select.var = list(contrib = 5),
ggtheme = theme_minimal())
# Scree plot
scree.plot <- fviz_eig(res.mca)
# Biplot of row and colum variables
biplot.mca <- fviz_mca_biplot(res.mca)
library(ggpubr)
##
## Attaching package: 'ggpubr'
## The following object is masked from 'package:forecast':
##
## gghistogram
ggexport(plotlist = list(scree.plot, biplot.mca),
filename = "MCA.pdf")
## file saved to MCA.pdf
# Export into a TXT file
write.infile(res.mca, "mca.txt", sep = "\t")
# Export into a CSV file
write.infile(res.mca, "mca.csv", sep = ";")