set.seed(10)
suppressMessages({
require(datasets)
require(knitr)
require(graphics)
require(GGally)
require(stats)
require(caret)
require(FactoMineR)
require(factoextra)
require(corrplot)
require(plotly)
require(dplyr)
})
kable(head(swiss))
Courtelary |
80.2 |
17.0 |
15 |
12 |
9.96 |
22.2 |
Delemont |
83.1 |
45.1 |
6 |
9 |
84.84 |
22.2 |
Franches-Mnt |
92.5 |
39.7 |
5 |
5 |
93.40 |
20.2 |
Moutier |
85.8 |
36.5 |
12 |
7 |
33.77 |
20.3 |
Neuveville |
76.9 |
43.5 |
17 |
15 |
5.16 |
20.6 |
Porrentruy |
76.1 |
35.3 |
9 |
7 |
90.57 |
26.6 |
ggcorr(swiss, label=TRUE)

pairs(swiss)

model.1var <- lm(
Fertility ~ Education,
data = swiss
)
model.3vars <- lm(
Fertility ~ Agriculture + Education + Infant.Mortality,
data = swiss
)
model.5vars <- lm(
Fertility ~ .,
data = swiss
)
anova(model.1var, model.3vars, model.5vars)
## Analysis of Variance Table
##
## Model 1: Fertility ~ Education
## Model 2: Fertility ~ Agriculture + Education + Infant.Mortality
## Model 3: Fertility ~ Agriculture + Examination + Education + Catholic +
## Infant.Mortality
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 45 4015.2
## 2 43 3114.6 2 900.6 8.7705 0.0006747 ***
## 3 41 2105.0 2 1009.6 9.8319 0.0003251 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
swiss.pca <- PCA(swiss, ncp = 5, graph = FALSE)
swiss.pca.vars <- get_pca_var(swiss.pca)
kable(head(swiss.pca.vars$cos2))
Fertility |
0.6682298 |
0.1232303 |
0.0255934 |
0.1258945 |
0.0300168 |
Agriculture |
0.5758205 |
0.2012318 |
0.0012463 |
0.1814285 |
0.0287552 |
Examination |
0.8313848 |
0.0185723 |
0.0070557 |
0.0013019 |
0.1356201 |
Education |
0.6604276 |
0.0380956 |
0.2402486 |
0.0041630 |
0.0010440 |
Catholic |
0.3922192 |
0.0252859 |
0.5517382 |
0.0043431 |
0.0068625 |
Infant.Mortality |
0.0716750 |
0.7818923 |
0.0217277 |
0.1217977 |
0.0022351 |
corrplot(swiss.pca.vars$cos2, is.corr = FALSE)

swiss.pca.eig.val <- get_eigenvalue(swiss.pca)
fviz_eig(swiss.pca, addlabels = TRUE, ylim = c(0, 70))

swiss.pca.ind <- get_pca_ind(swiss.pca)
swiss.data.pca <- cbind(data.frame(swiss.pca.ind$coord), swiss)
plot_ly(
swiss,
x = ~Examination,
y = ~Infant.Mortality,
z = ~Catholic,
color = ~Fertility,
type = "scatter3d",
mode = "markers"
)
plot_ly(
swiss.data.pca,
x = ~Dim.1,
y = ~Dim.2,
z = ~Dim.3,
color = ~Fertility,
type = "scatter3d",
mode = "markers"
)
swiss.model.pca.glm <- train(
Fertility ~ Examination + Infant.Mortality + Catholic,
swiss,
method = "glm",
preProcess = "pca"
)
point.cloud <- data.frame(
Fertility = rnorm(100, mean = 50, sd = 10),
Agriculture = rnorm(100, mean = 50, sd = 10),
Examination = rnorm(100, mean = 50, sd = 10),
Education = rnorm(100, mean = 50, sd = 10),
Catholic = rnorm(100, mean = 50, sd = 10),
Infant.Mortality = rnorm(100, mean = 50, sd = 10)
)
point.cloud <- point.cloud %>%
mutate(Fertility.Predicted = predict(swiss.model.pca.glm, point.cloud))
plot_ly(
point.cloud,
x = ~Examination,
y = ~Infant.Mortality,
z = ~Catholic,
color = ~Fertility.Predicted,
type = "scatter3d",
mode = "markers"
)