set.seed(10)

suppressMessages({
  require(datasets)
  require(knitr)
  require(graphics)
  require(GGally)
  require(stats)
  require(caret)
  require(FactoMineR)
  require(factoextra)
  require(corrplot)
  require(plotly)
  require(dplyr)
})

kable(head(swiss))
Fertility Agriculture Examination Education Catholic Infant.Mortality
Courtelary 80.2 17.0 15 12 9.96 22.2
Delemont 83.1 45.1 6 9 84.84 22.2
Franches-Mnt 92.5 39.7 5 5 93.40 20.2
Moutier 85.8 36.5 12 7 33.77 20.3
Neuveville 76.9 43.5 17 15 5.16 20.6
Porrentruy 76.1 35.3 9 7 90.57 26.6
ggcorr(swiss, label=TRUE)

pairs(swiss)

model.1var <- lm(
  Fertility ~ Education, 
  data = swiss
)

model.3vars <- lm(
  Fertility ~ Agriculture + Education + Infant.Mortality, 
  data = swiss
)

model.5vars <- lm(
  Fertility ~ ., 
  data = swiss
)

anova(model.1var, model.3vars, model.5vars)
## Analysis of Variance Table
## 
## Model 1: Fertility ~ Education
## Model 2: Fertility ~ Agriculture + Education + Infant.Mortality
## Model 3: Fertility ~ Agriculture + Examination + Education + Catholic + 
##     Infant.Mortality
##   Res.Df    RSS Df Sum of Sq      F    Pr(>F)    
## 1     45 4015.2                                  
## 2     43 3114.6  2     900.6 8.7705 0.0006747 ***
## 3     41 2105.0  2    1009.6 9.8319 0.0003251 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
swiss.pca <- PCA(swiss, ncp = 5, graph = FALSE)
swiss.pca.vars <- get_pca_var(swiss.pca)

kable(head(swiss.pca.vars$cos2))
Dim.1 Dim.2 Dim.3 Dim.4 Dim.5
Fertility 0.6682298 0.1232303 0.0255934 0.1258945 0.0300168
Agriculture 0.5758205 0.2012318 0.0012463 0.1814285 0.0287552
Examination 0.8313848 0.0185723 0.0070557 0.0013019 0.1356201
Education 0.6604276 0.0380956 0.2402486 0.0041630 0.0010440
Catholic 0.3922192 0.0252859 0.5517382 0.0043431 0.0068625
Infant.Mortality 0.0716750 0.7818923 0.0217277 0.1217977 0.0022351
corrplot(swiss.pca.vars$cos2, is.corr = FALSE)

swiss.pca.eig.val <- get_eigenvalue(swiss.pca)

fviz_eig(swiss.pca, addlabels = TRUE, ylim = c(0, 70))

swiss.pca.ind <- get_pca_ind(swiss.pca)

swiss.data.pca <- cbind(data.frame(swiss.pca.ind$coord), swiss)
plot_ly(
  swiss, 
  x = ~Examination, 
  y = ~Infant.Mortality, 
  z = ~Catholic, 
  color = ~Fertility,
  type = "scatter3d",
  mode = "markers"
)
plot_ly(
  swiss.data.pca, 
  x = ~Dim.1, 
  y = ~Dim.2, 
  z = ~Dim.3, 
  color = ~Fertility,
  type = "scatter3d",
  mode = "markers"
)
swiss.model.pca.glm <- train(
  Fertility ~ Examination + Infant.Mortality + Catholic,
  swiss, 
  method = "glm",
  preProcess = "pca"
)

point.cloud <- data.frame(
  Fertility = rnorm(100, mean = 50, sd = 10),
  Agriculture = rnorm(100, mean = 50, sd = 10),
  Examination = rnorm(100, mean = 50, sd = 10),
  Education = rnorm(100, mean = 50, sd = 10),
  Catholic = rnorm(100, mean = 50, sd = 10),
  Infant.Mortality = rnorm(100, mean = 50, sd = 10)
)

point.cloud <- point.cloud %>%
  mutate(Fertility.Predicted = predict(swiss.model.pca.glm, point.cloud))

plot_ly(
  point.cloud, 
  x = ~Examination, 
  y = ~Infant.Mortality, 
  z = ~Catholic, 
  color = ~Fertility.Predicted,
  type = "scatter3d",
  mode = "markers"
)