library(factoextra)
## Warning: package 'factoextra' was built under R version 4.3.3
## Loading required package: ggplot2
## Warning: package 'ggplot2' was built under R version 4.3.2
## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa
library(outliers)
setwd("D:/R CSV")
df <- read.csv("2019.csv")
df <- subset(df,select = -c(Overall.rank,Country.or.region))
par(mfrow=c(2,2))
for(i in 1:7) {boxplot(df[,i], main=names(df)[i])}


sapply(df, function(df) grubbs.test(df))
## Score GDP.per.capita
## statistic numeric,2 numeric,2
## alternative "lowest value 2.853 is an outlier" "lowest value 0 is an outlier"
## p.value 1 1
## method "Grubbs test for one outlier" "Grubbs test for one outlier"
## data.name "df" "df"
## Social.support Healthy.life.expectancy
## statistic numeric,2 numeric,2
## alternative "lowest value 0 is an outlier" "lowest value 0 is an outlier"
## p.value 0.00262529 0.1873518
## method "Grubbs test for one outlier" "Grubbs test for one outlier"
## data.name "df" "df"
## Freedom.to.make.life.choices Generosity
## statistic numeric,2 numeric,2
## alternative "lowest value 0 is an outlier" "highest value 0.566 is an outlier"
## p.value 0.4379682 0.003153117
## method "Grubbs test for one outlier" "Grubbs test for one outlier"
## data.name "df" "df"
## Perceptions.of.corruption
## statistic numeric,2
## alternative "highest value 0.453 is an outlier"
## p.value 0.01703929
## method "Grubbs test for one outlier"
## data.name "df"
col_num = colnames(df)
par(mar = c(3,3,1,1),
mfrow = c(2,2))
for (i in col_num) {
hist(df[[i]],
main = i,
las = 2,
col = "green")
}


fviz_nbclust(df, kmeans, method = "gap_stat")+
labs(subtitle = "Gap statistic method")

km.res <- kmeans(df, 5)
fviz_cluster(km.res, df, ellipse.type = "norm")

dim(df)
## [1] 156 7
boxplot.stats(df$Social.support)
## $stats
## [1] 0.5170 1.0555 1.2715 1.4530 1.6240
##
## $n
## [1] 156
##
## $conf
## [1] 1.221216 1.321784
##
## $out
## [1] 0.437 0.447 0.378 0.000
for (i in (1:4)){
df <- df[-which(df$Social.support %in% boxplot.stats(df$Social.support)$out),]
}
#Atribut Healthy.life.expectancy
boxplot.stats(df$Healthy.life.expectancy)
## $stats
## [1] 0.1680 0.5775 0.8035 0.8920 1.1410
##
## $n
## [1] 148
##
## $conf
## [1] 0.7626542 0.8443458
##
## $out
## [1] 0
df <- df[-which(df$Healthy.life.expectancy %in% boxplot.stats(df$Healthy.life.expectancy)$out),]
#Atribut Freedom.to.make.life.choices
boxplot.stats(df$Freedom.to.make.life.choices)
## $stats
## [1] 0.0660 0.3175 0.4300 0.5080 0.6310
##
## $n
## [1] 147
##
## $conf
## [1] 0.4051748 0.4548252
##
## $out
## [1] 0.026
df <- df[-which(df$Freedom.to.make.life.choices %in% boxplot.stats(df$Freedom.to.make.life.choices)$out),]
#Atribut Generosity
boxplot.stats(df$Generosity)
## $stats
## [1] 0.0000 0.1070 0.1775 0.2520 0.3750
##
## $n
## [1] 146
##
## $conf
## [1] 0.1585395 0.1964605
##
## $out
## [1] 0.498 0.566
df <- df[-which(df$Generosity %in% boxplot.stats(df$Generosity)$out),]
#Atribut Perceptions.of.corruption
boxplot.stats(df$Perceptions.of.corruption)
## $stats
## [1] 0.0000 0.0470 0.0835 0.1425 0.2780
##
## $n
## [1] 144
##
## $conf
## [1] 0.07092583 0.09607417
##
## $out
## [1] 0.393 0.410 0.341 0.298 0.343 0.373 0.380 0.308 0.290 0.316 0.310 0.453
## [13] 0.287 0.411
for (i in (1:3)){
df <- df[-which(df$Perceptions.of.corruption %in% boxplot.stats(df$Perceptions.of.corruption)$out),]
}
#Pembersihan Outlier
dim(df)
## [1] 124 7
col_num = colnames(df)
par(mar = c(3,3,1,1),
mfrow = c(2,2))
for (i in col_num) {
hist(df[[i]],
main = i,
las = 2,
col = "green")
}


par(mfrow=c(2,2))
for(i in 1:7) {boxplot(df[,i], main=names(df)[i])}


sapply(df, function(df) grubbs.test(df))
## Score
## statistic numeric,2
## alternative "highest value 7.494 is an outlier"
## p.value 0.856768
## method "Grubbs test for one outlier"
## data.name "df"
## GDP.per.capita
## statistic numeric,2
## alternative "lowest value 0.073 is an outlier"
## p.value 1
## method "Grubbs test for one outlier"
## data.name "df"
## Social.support
## statistic numeric,2
## alternative "lowest value 0.666 is an outlier"
## p.value 0.7477548
## method "Grubbs test for one outlier"
## data.name "df"
## Healthy.life.expectancy
## statistic numeric,2
## alternative "lowest value 0.168 is an outlier"
## p.value 0.4964903
## method "Grubbs test for one outlier"
## data.name "df"
## Freedom.to.make.life.choices
## statistic numeric,2
## alternative "lowest value 0.066 is an outlier"
## p.value 0.6943657
## method "Grubbs test for one outlier"
## data.name "df"
## Generosity
## statistic numeric,2
## alternative "highest value 0.375 is an outlier"
## p.value 0.582165
## method "Grubbs test for one outlier"
## data.name "df"
## Perceptions.of.corruption
## statistic numeric,2
## alternative "highest value 0.183 is an outlier"
## p.value 1
## method "Grubbs test for one outlier"
## data.name "df"
km.res <- kmeans(df, 5)
fviz_cluster(km.res, df, ellipse.type = "norm")
