setwd('D:/Rakha/Skripsi')
#####
# C Berjalan
library(VIM)
## Loading required package: colorspace
## Loading required package: grid
## VIM is ready to use.
## Suggestions and bug-reports can be submitted at: https://github.com/statistikat/VIM/issues
##
## Attaching package: 'VIM'
## The following object is masked from 'package:datasets':
##
## sleep
library(missForest)
## Warning: package 'missForest' was built under R version 4.2.2
##
## Attaching package: 'missForest'
## The following object is masked from 'package:VIM':
##
## nrmse
load("Rdata Fix/df_simulasi.Rdata")
df <- df_simulasi[,-7]
set.seed(23)
df_2prsn <- prodNA(df, noNA = 0.02)
best_k <- function(data_mis, data, k_seq){
library(VIM)
library(missForest)
listerr <- c() #Create a list in which to save BIC.
for(i in 1:length(k_seq)){
df_imp <- VIM::kNN(data_mis, k = k_seq[i])[,1:6]
err <- mixError(df_imp, data_mis, data)
listerr[i] <- err # save rmse into the vector
}
return(listerr) #Return the list of rmse.
}
k_seq <- seq(1,23,2)
k_terbaik <- best_k(df_2prsn, df, k_seq = k_seq)
plot(k_terbaik)

#####
# Proporsi NA Berjalan
library(missForest)
library(VIM)
library(flexmix)
## Loading required package: lattice
get_BIC <- function(proporsi_na, data){
listBIC <- c() #Create a list in which to save BIC.
for(i in 1:length(proporsi_na)){ #Loop through the numbers missing value proportion
set.seed(23)
df <- prodNA(data, noNA = proporsi_na[i])
df_imp <- VIM::kNN(df, k = 5)[,1:6]
set.seed(23)
model <- flexmix(y ~ x1 + x2 + x3 + x4 + x5, data = df_imp, k = 2,
control = list(iter = 200, tol = 0.00001, verbose = 0))
BIC_model <- BIC(model)
listBIC[i] <- BIC_model # save BIC into the list
}
return(listBIC) #Return the list of BIC
}
na_prop <- seq(0.005, 0.1, by = 0.005)
BIC_all <- get_BIC(proporsi_na = na_prop, data = df)
plot(BIC_all)

# Import Library
library(flexmix)
library(readxl)
#Import Dataset
df_raw <- read_excel("Dataset.xls", sheet = "df", na = c("Tr", "-"))
df_raw <- df_raw[,-c(1,8,9)]
#Exploratory Data Analysis
summary(df_raw)
## Y X1 X2 X3
## Min. : 0.00 Min. :24.20 Min. :1.006 Min. : 4.000
## 1st Qu.: 0.00 1st Qu.:27.40 1st Qu.:1.008 1st Qu.: 7.000
## Median : 1.00 Median :27.80 Median :1.009 Median : 8.000
## Mean : 10.71 Mean :27.76 Mean :1.009 Mean : 8.457
## 3rd Qu.: 11.30 3rd Qu.:28.20 3rd Qu.:1.010 3rd Qu.: 9.000
## Max. :257.00 Max. :30.10 Max. :1.012 Max. :29.000
## NA's :44
## X4 X5
## Min. : 0.000 Min. :0.5000
## 1st Qu.: 3.600 1st Qu.:0.7500
## Median : 5.800 Median :0.8750
## Mean : 5.552 Mean :0.8068
## 3rd Qu.: 7.600 3rd Qu.:0.8750
## Max. :10.700 Max. :1.0000
## NA's :12
#Impute Missing Value
library(DMwR2)
## Warning: package 'DMwR2' was built under R version 4.2.2
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
##
## Attaching package: 'DMwR2'
## The following object is masked from 'package:VIM':
##
## kNN
load("Last/KNN Imputation.Rdata")
df <- knni(as.data.frame(df_raw), k = 19, scale = F)
df <- df[,-c(7,8)]
summary(df)
## Y X1 X2 X3
## Min. : 0.00 Min. :24.20 Min. :1.006 Min. : 4.000
## 1st Qu.: 0.00 1st Qu.:27.40 1st Qu.:1.008 1st Qu.: 7.000
## Median : 2.00 Median :27.80 Median :1.009 Median : 8.000
## Mean : 10.63 Mean :27.76 Mean :1.009 Mean : 8.457
## 3rd Qu.: 12.28 3rd Qu.:28.20 3rd Qu.:1.010 3rd Qu.: 9.000
## Max. :257.00 Max. :30.10 Max. :1.012 Max. :29.000
## X4 X5
## Min. : 0.000 Min. :0.5000
## 1st Qu.: 3.700 1st Qu.:0.7500
## Median : 5.900 Median :0.8750
## Mean : 5.577 Mean :0.8068
## 3rd Qu.: 7.600 3rd Qu.:0.8750
## Max. :10.700 Max. :1.0000
#Modelling
kontrol <- list(tolerance = 10^-5, iter.max = 40)
clr_model_step <- stepFlexmix(Y ~ X1 + X2 + X3 + X4 + X5, data = df, k = 1:6, nrep = 10, verbose = TRUE, control = kontrol)
## 1 : * * * * * * * * * *
## 2 : * * * * * * * * * *
## 3 : *Error in FLXfit(model = model, concomitant = concomitant, control = control, :
## 34 Log-likelihood: NaN
## *Error in FLXfit(model = model, concomitant = concomitant, control = control, :
## 34 Log-likelihood: NaN
## *Error in FLXfit(model = model, concomitant = concomitant, control = control, :
## 33 Log-likelihood: NaN
## *Error in FLXfit(model = model, concomitant = concomitant, control = control, :
## 34 Log-likelihood: NaN
## *Error in FLXfit(model = model, concomitant = concomitant, control = control, :
## 33 Log-likelihood: NaN
## *Error in FLXfit(model = model, concomitant = concomitant, control = control, :
## 34 Log-likelihood: NaN
## *Error in FLXfit(model = model, concomitant = concomitant, control = control, :
## 34 Log-likelihood: NaN
## *Error in FLXfit(model = model, concomitant = concomitant, control = control, :
## 34 Log-likelihood: NaN
## *Error in FLXfit(model = model, concomitant = concomitant, control = control, :
## 33 Log-likelihood: NaN
## *Error in FLXfit(model = model, concomitant = concomitant, control = control, :
## 33 Log-likelihood: NaN
##
## 4 : *Error in FLXfit(model = model, concomitant = concomitant, control = control, :
## 29 Log-likelihood: NaN
## *Error in FLXfit(model = model, concomitant = concomitant, control = control, :
## 31 Log-likelihood: NaN
## *Error in FLXfit(model = model, concomitant = concomitant, control = control, :
## 30 Log-likelihood: NaN
## *Error in FLXfit(model = model, concomitant = concomitant, control = control, :
## 31 Log-likelihood: NaN
## *Error in FLXfit(model = model, concomitant = concomitant, control = control, :
## 28 Log-likelihood: NaN
## *Error in FLXfit(model = model, concomitant = concomitant, control = control, :
## 33 Log-likelihood: NaN
## *Error in FLXfit(model = model, concomitant = concomitant, control = control, :
## 30 Log-likelihood: NaN
## *Error in FLXfit(model = model, concomitant = concomitant, control = control, :
## 28 Log-likelihood: NaN
## *Error in FLXfit(model = model, concomitant = concomitant, control = control, :
## 28 Log-likelihood: NaN
## *Error in FLXfit(model = model, concomitant = concomitant, control = control, :
## 29 Log-likelihood: NaN
##
## 5 : *Error in FLXfit(model = model, concomitant = concomitant, control = control, :
## 25 Log-likelihood: NaN
## *Error in FLXfit(model = model, concomitant = concomitant, control = control, :
## 24 Log-likelihood: NaN
## *Error in FLXfit(model = model, concomitant = concomitant, control = control, :
## 23 Log-likelihood: NaN
## *Error in FLXfit(model = model, concomitant = concomitant, control = control, :
## 25 Log-likelihood: NaN
## *Error in FLXfit(model = model, concomitant = concomitant, control = control, :
## 25 Log-likelihood: NaN
## *Error in FLXfit(model = model, concomitant = concomitant, control = control, :
## 26 Log-likelihood: NaN
## *Error in FLXfit(model = model, concomitant = concomitant, control = control, :
## 26 Log-likelihood: NaN
## *Error in FLXfit(model = model, concomitant = concomitant, control = control, :
## 24 Log-likelihood: NaN
## *Error in FLXfit(model = model, concomitant = concomitant, control = control, :
## 25 Log-likelihood: NaN
## *Error in FLXfit(model = model, concomitant = concomitant, control = control, :
## 24 Log-likelihood: NaN
##
## 6 : *Error in FLXfit(model = model, concomitant = concomitant, control = control, :
## 24 Log-likelihood: NaN
## *Error in FLXfit(model = model, concomitant = concomitant, control = control, :
## 24 Log-likelihood: NaN
## *Error in FLXfit(model = model, concomitant = concomitant, control = control, :
## 20 Log-likelihood: NaN
## *Error in FLXfit(model = model, concomitant = concomitant, control = control, :
## 23 Log-likelihood: NaN
## *Error in FLXfit(model = model, concomitant = concomitant, control = control, :
## 21 Log-likelihood: NaN
## *Error in FLXfit(model = model, concomitant = concomitant, control = control, :
## 24 Log-likelihood: NaN
## *Error in FLXfit(model = model, concomitant = concomitant, control = control, :
## 24 Log-likelihood: NaN
## *Error in FLXfit(model = model, concomitant = concomitant, control = control, :
## 22 Log-likelihood: NaN
## *Error in FLXfit(model = model, concomitant = concomitant, control = control, :
## 24 Log-likelihood: NaN
## *Error in FLXfit(model = model, concomitant = concomitant, control = control, :
## 26 Log-likelihood: NaN
set.seed(23)
clr_model <- flexmix(Y ~ X1 + X2 + X3 + X4 + X5, data = df, k = 2)
clr_model@components$Comp.1
## [[1]]
## $coef
## (Intercept) X1 X2 X3 X4
## 15.275147861 -0.109014031 -12.922446861 -0.001235282 0.016766274
## X5
## 1.161101017
##
## $sigma
## [1] 0.3222568
clr_model@components$Comp.2
## [[1]]
## $coef
## (Intercept) X1 X2 X3 X4 X5
## 19.9903220 -5.0393227 61.3241855 1.8023989 -0.0250367 71.8848454
##
## $sigma
## [1] 26.392
summary(clr_model)
##
## Call:
## flexmix(formula = Y ~ X1 + X2 + X3 + X4 + X5, data = df, k = 2)
##
## prior size post>0 ratio
## Comp.1 0.438 164 171 0.959
## Comp.2 0.562 197 361 0.546
##
## 'log Lik.' -1219.73 (df=15)
## AIC: 2469.461 BIC: 2527.794
parameters(clr_model, component = 1)
## Comp.1
## coef.(Intercept) 15.275147861
## coef.X1 -0.109014031
## coef.X2 -12.922446861
## coef.X3 -0.001235282
## coef.X4 0.016766274
## coef.X5 1.161101017
## sigma 0.322256805
parameters(clr_model, component = 2)
## Comp.2
## coef.(Intercept) 19.9903220
## coef.X1 -5.0393227
## coef.X2 61.3241855
## coef.X3 1.8023989
## coef.X4 -0.0250367
## coef.X5 71.8848454
## sigma 26.3920010
#Uji Signifikansi Parameter
rmodel <- refit(clr_model)
summary(rmodel)
## $Comp.1
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 15.2722890 24.1262058 0.6330 0.5267228
## X1 -0.1089509 0.0457321 -2.3824 0.0172014 *
## X2 -12.9216838 23.6400780 -0.5466 0.5846531
## X3 -0.0012378 0.0099958 -0.1238 0.9014443
## X4 0.0169297 0.0143471 1.1800 0.2379964
## X5 1.1582056 0.3198297 3.6213 0.0002931 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## $Comp.2
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 19.990337 1606.451393 0.0124 0.990072
## X1 -5.039498 2.682002 -1.8790 0.060244 .
## X2 61.324203 1580.512026 0.0388 0.969050
## X3 1.801952 0.689642 2.6129 0.008978 **
## X4 -0.027047 0.960487 -0.0282 0.977534
## X5 71.885330 29.253023 2.4574 0.013996 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Clustered_dataset <- cbind(df, clr_model@cluster)
colnames(Clustered_dataset) <- c("Y", "X1", "X2", "X3", "X4", "X5", "Cluster")
Gerombol1 <- Clustered_dataset[Clustered_dataset$Cluster == 1,]
Gerombol2 <- Clustered_dataset[Clustered_dataset$Cluster == 2,]
summary(Gerombol1[,-7])
## Y X1 X2 X3
## Min. :0.0000 Min. :26.10 Min. :1.006 Min. : 4.000
## 1st Qu.:0.0000 1st Qu.:27.60 1st Qu.:1.008 1st Qu.: 7.000
## Median :0.0000 Median :27.90 Median :1.009 Median : 8.000
## Mean :0.1847 Mean :27.92 Mean :1.009 Mean : 8.372
## 3rd Qu.:0.0000 3rd Qu.:28.30 3rd Qu.:1.010 3rd Qu.: 9.000
## Max. :1.0000 Max. :30.10 Max. :1.012 Max. :29.000
## X4 X5
## Min. : 0.000 Min. :0.5000
## 1st Qu.: 5.100 1st Qu.:0.7500
## Median : 7.000 Median :0.7500
## Mean : 6.524 Mean :0.7553
## 3rd Qu.: 8.525 3rd Qu.:0.8750
## Max. :10.700 Max. :0.8750
round(diag(var(Gerombol1)), 6)
## Y X1 X2 X3 X4 X5 Cluster
## 0.129234 0.404278 0.000001 7.462031 6.857422 0.010612 0.000000
summary(Gerombol2[,-7])
## Y X1 X2 X3
## Min. : 1.307 Min. :24.20 Min. :1.006 Min. : 4.000
## 1st Qu.: 4.000 1st Qu.:27.20 1st Qu.:1.008 1st Qu.: 7.000
## Median : 10.988 Median :27.70 Median :1.009 Median : 8.000
## Mean : 19.319 Mean :27.63 Mean :1.009 Mean : 8.528
## 3rd Qu.: 22.000 3rd Qu.:28.10 3rd Qu.:1.010 3rd Qu.:10.000
## Max. :257.000 Max. :29.90 Max. :1.012 Max. :21.000
## X4 X5
## Min. : 0.000 Min. :0.6250
## 1st Qu.: 2.800 1st Qu.:0.7500
## Median : 5.100 Median :0.8750
## Mean : 4.789 Mean :0.8496
## 3rd Qu.: 6.700 3rd Qu.:0.8750
## Max. :10.500 Max. :1.0000
round(diag(var(Gerombol2)), 6)
## Y X1 X2 X3 X4 X5 Cluster
## 785.270767 0.640014 0.000001 7.454574 6.575821 0.005730 0.000000
boxplot(Y~ as.factor(Cluster), ylab = "Jumlah Curah Hujan Harian", xlab = "Gerombol",
data = Clustered_dataset, outline = F)

#writexl::write_xlsx(Clustered_dataset, "Last/Hasil Gerombol.xlsx")