Tugas STA1581 Prof Khairil

Packages

library(readxl)
library(dplyr)

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

library(neuralnet)

## 
## Attaching package: 'neuralnet'

## The following object is masked from 'package:dplyr':
## 
##     compute

library(ggplot2)

Import Data

data_cancer <- read_excel("Cancer vs AP_Xray_Stage_Grade_Age.xlsx", sheet = "Sheet1")

glimpse(data_cancer)

## Rows: 300
## Columns: 6
## $ AP    <dbl> 128, 46, 165, 102, 42, 52, 112, 146, 157, 174, 135, 144, 136, 63…
## $ XRay  <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0…
## $ Stage <dbl> 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 1…
## $ Grade <dbl> 1, 1, 1, 1, 0, 0, 0, 0, 1, 0, 1, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0…
## $ Age   <dbl> 54, 55, 53, 50, 61, 55, 52, 55, 59, 60, 66, 60, 53, 61, 66, 63, …
## $ Y     <dbl> 1, 0, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 0…

head(data_cancer)

## # A tibble: 6 × 6
##      AP  XRay Stage Grade   Age     Y
##   <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1   128     0     1     1    54     1
## 2    46     0     1     1    55     0
## 3   165     0     0     1    53     1
## 4   102     0     1     1    50     1
## 5    42     0     1     0    61     0
## 6    52     0     1     0    55     0

Dibuat Faktor

data_cancer1 <- data_cancer
data_cancer1$XRay <- as.factor(data_cancer1$XRay)
data_cancer1$Stage <- as.factor(data_cancer1$Stage)
data_cancer1$Grade <- as.factor(data_cancer1$Grade)

Subset Data

set.seed(122)
r <- runif(nrow(data_cancer1))
data_cancer2 <- cbind(data_cancer1, r)
data_cancer2 <- data_cancer2[order(data_cancer2$r),]
data_cancer2 <- data_cancer2[1:200, c('AP', 'XRay', 'Stage', 'Grade', 'Age', 'Y')]
head(data_cancer2)

##      AP XRay Stage Grade Age Y
## 8   146    0     1     0  55 1
## 24   83    0     0     0  56 0
## 187  80    0     1     1  50 0
## 27  119    0     1     1  69 1
## 281  84    0     1     0  55 0
## 214  44    1     1     1  66 1

nrow(data_cancer2)

## [1] 200

Eksplorasi Data

summary(data_cancer2)

##        AP         XRay    Stage   Grade        Age              Y        
##  Min.   : 41.00   0:100   0: 92   0: 97   Min.   :49.00   Min.   :0.000  
##  1st Qu.: 73.75   1:100   1:108   1:103   1st Qu.:54.00   1st Qu.:0.000  
##  Median :109.50                           Median :59.00   Median :1.000  
##  Mean   :110.20                           Mean   :59.06   Mean   :0.685  
##  3rd Qu.:146.00                           3rd Qu.:64.00   3rd Qu.:1.000  
##  Max.   :190.00                           Max.   :70.00   Max.   :1.000

library(ggplot2)

ggplot(data_cancer2, aes(x = factor(1), fill = XRay)) + geom_bar(width = 1) + scale_fill_brewer(palette="Set2") + coord_polar("y") + stat_count(geom = "text", aes(label = stat(count)), position=position_stack(vjust=0.5), colour="white")

## Warning: `stat(count)` was deprecated in ggplot2 3.4.0.
## ℹ Please use `after_stat(count)` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

ggplot(data_cancer2, aes(x = factor(1), fill = Stage)) + geom_bar(width = 1) + scale_fill_brewer(palette="Accent") + coord_polar("y") + stat_count(geom = "text", aes(label = stat(count)), position=position_stack(vjust=0.5), colour="white")

ggplot(data_cancer2, aes(x = factor(1), fill = Grade)) + geom_bar(width = 1) + coord_polar("y") + stat_count(geom = "text", aes(label = stat(count)), position=position_stack(vjust=0.5), colour="white")

ggplot(data_cancer2, aes(x=XRay, fill=as.factor(Y)))+geom_bar(position = "stack") + ylab("count") + geom_text(stat = 'count', aes(label = stat(count)), position = position_stack(vjust = 0.5), size = 3, color = "white") + labs(fill = 'Y') + scale_fill_brewer(palette="Set2")

ggplot(data_cancer2, aes(x=Stage, fill=as.factor(Y)))+geom_bar(position = "stack") + ylab("count") + geom_text(stat = 'count', aes(label = stat(count)), position = position_stack(vjust = 0.5), size = 3, color = "white") + labs(fill = 'Y') + scale_fill_brewer(palette="Accent")

ggplot(data_cancer2, aes(x=Grade, fill=as.factor(Y)))+geom_bar(position = "stack") + ylab("count")+  geom_text(stat = 'count', aes(label = stat(count)), position = position_stack(vjust = 0.5), size = 3, color = "white") + labs(fill = 'Y')

get_box_stats <- function(y, upper_limit = max(y) * 1.15) {
  return(data.frame(
    y = 0.95 * upper_limit,
    label = paste(
      "Count =", length(y), "\n",
      "Mean =", round(mean(y), 2), "\n",
      "Median =", round(median(y), 2), "\n"
    )
  ))
}
ggplot(data_cancer2, aes(x = Y, y = AP, group = Y)) + geom_boxplot(fill = c("#999162", "#668291")) + stat_summary(fun.data = get_box_stats, geom = "text", hjust = 0.5, vjust = 0.8)

ggplot(data_cancer2, aes(x = Y, y = Age, group = Y)) + geom_boxplot(fill = c("#E6A700", "#76B4E9")) + stat_summary(fun.data = get_box_stats, geom = "text", hjust = 0.5, vjust = 0.9)

ggplot(data_cancer2, aes(x = AP, y = Y)) +
    geom_point(aes(color = 'coral'))

ggplot(data_cancer2, aes(x = Age, y = Y)) +
    geom_point(aes(color='coral'))

ggplot(data_cancer2, aes(x = factor(1), fill = as.factor(Y))) + geom_bar(width = 1) + scale_fill_brewer(palette="Dark2") + coord_polar("y") + labs(fill = 'Y') + stat_count(geom = "text", aes(label = stat(count)), position=position_stack(vjust=0.5), colour="white")

Split Data

set.seed(122)
log_sample <- sample(c(TRUE, FALSE), nrow(data_cancer2), 
                      replace=TRUE, prob=c(0.8,0.2))
log_train_data  <- data_cancer2[log_sample, ]
log_test_data   <- data_cancer2[!log_sample, ]
glimpse(log_train_data); glimpse(log_test_data)

## Rows: 158
## Columns: 6
## $ AP    <dbl> 80, 119, 84, 44, 184, 51, 178, 132, 177, 125, 68, 89, 157, 119, …
## $ XRay  <fct> 0, 0, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 0, 1, 0, 0, 1, 0, 1, 1, 1, 1…
## $ Stage <fct> 1, 1, 1, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 0, 1, 1, 0, 1, 1, 0, 0, 1…
## $ Grade <fct> 1, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1…
## $ Age   <dbl> 50, 69, 55, 66, 51, 61, 62, 53, 50, 63, 65, 58, 59, 56, 50, 68, …
## $ Y     <dbl> 0, 1, 0, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1…

## Rows: 42
## Columns: 6
## $ AP    <dbl> 146, 83, 83, 122, 136, 151, 79, 59, 58, 135, 45, 62, 102, 58, 12…
## $ XRay  <fct> 0, 0, 1, 1, 0, 1, 1, 0, 1, 0, 1, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0…
## $ Stage <fct> 1, 0, 0, 0, 0, 1, 0, 1, 0, 1, 1, 0, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1…
## $ Grade <fct> 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 0, 1, 0, 0, 1, 1, 0…
## $ Age   <dbl> 55, 56, 56, 61, 54, 65, 52, 62, 50, 64, 63, 63, 66, 56, 68, 55, …
## $ Y     <dbl> 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 0…

Eksplorasi

library(DataExplorer)

plot_histogram(data_cancer2)

plot_bar(data_cancer2)

plot_boxplot(data_cancer2, by='Y')

Regresi Logistik

model1_log <- glm(Y~., data = log_train_data, family = 'binomial')
summary(model1_log)

## 
## Call:
## glm(formula = Y ~ ., family = "binomial", data = log_train_data)
## 
## Coefficients:
##              Estimate Std. Error z value Pr(>|z|)    
## (Intercept) -1.307981   2.351900  -0.556 0.578117    
## AP           0.044874   0.008511   5.272 1.35e-07 ***
## XRay1        2.755007   0.668962   4.118 3.82e-05 ***
## Stage1       2.389997   0.620304   3.853 0.000117 ***
## Grade1       1.429076   0.500288   2.857 0.004283 ** 
## Age         -0.091964   0.040389  -2.277 0.022790 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 192.35  on 157  degrees of freedom
## Residual deviance: 119.19  on 152  degrees of freedom
## AIC: 131.19
## 
## Number of Fisher Scoring iterations: 6

Odds Ratio

exp(model1_log$coefficients)

## (Intercept)          AP       XRay1      Stage1      Grade1         Age 
##   0.2703654   1.0458961  15.7211456  10.9134577   4.1748408   0.9121383

library(ggplot2)
log_train_data %>%
  mutate(prob = ifelse(Y == "1", 1, 0)) %>%
  ggplot(aes(model1_log$fitted.values, prob)) +
  geom_point(alpha = .05) +
  geom_smooth(formula=y~x, method = "glm", method.args = list(family = "binomial")) +
  ggtitle("Logistic regression model fit") +
  xlab("Fit") +
  ylab("Probability of Y")+
  theme_minimal()

model1_log$coefficients

## (Intercept)          AP       XRay1      Stage1      Grade1         Age 
## -1.30798105  0.04487401  2.75500666  2.38999668  1.42907622 -0.09196366

exp(model1_log$coefficients)

## (Intercept)          AP       XRay1      Stage1      Grade1         Age 
##   0.2703654   1.0458961  15.7211456  10.9134577   4.1748408   0.9121383

Matriks Konfusi Data Latih

pred_train1 <- round(predict(model1_log, newdata = log_train_data, type = 'response'))
result_Y1 <- data_frame(log_train_data$Y, pred_train1)

## Warning: `data_frame()` was deprecated in tibble 1.1.0.
## ℹ Please use `tibble()` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

length(pred_train1)

## [1] 158

head(result_Y1, 10)

## # A tibble: 10 × 2
##    `log_train_data$Y` pred_train1
##                 <dbl>       <dbl>
##  1                  0           1
##  2                  1           1
##  3                  0           0
##  4                  1           1
##  5                  1           1
##  6                  0           0
##  7                  1           1
##  8                  1           1
##  9                  1           1
## 10                  0           1

library(caret)

## Loading required package: lattice

confusionMatrix(as.factor(log_train_data$Y), as.factor(pred_train1))

## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  0  1
##          0 30 17
##          1 14 97
##                                           
##                Accuracy : 0.8038          
##                  95% CI : (0.7332, 0.8626)
##     No Information Rate : 0.7215          
##     P-Value [Acc > NIR] : 0.01129         
##                                           
##                   Kappa : 0.5218          
##                                           
##  Mcnemar's Test P-Value : 0.71944         
##                                           
##             Sensitivity : 0.6818          
##             Specificity : 0.8509          
##          Pos Pred Value : 0.6383          
##          Neg Pred Value : 0.8739          
##              Prevalence : 0.2785          
##          Detection Rate : 0.1899          
##    Detection Prevalence : 0.2975          
##       Balanced Accuracy : 0.7663          
##                                           
##        'Positive' Class : 0               
##

Matriks Konfusi Data Uji

pred_test1 <- round(predict(model1_log, newdata = log_test_data, type = 'response'))
pred_test2 <- predict(model1_log, newdata = log_test_data, type = 'response')
result_Y2 <- data.frame(log_test_data$Y, pred_test1)
head(result_Y2, 10)

##     log_test_data.Y pred_test1
## 8                 1          1
## 24                0          0
## 233               0          1
## 239               1          1
## 104               0          0
## 154               1          1
## 53                1          1
## 107               0          0
## 58                0          0
## 65                1          1

Confusion Matrix

confusionMatrix(as.factor(log_test_data$Y), as.factor(pred_test1))

## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  0  1
##          0 12  4
##          1  1 25
##                                           
##                Accuracy : 0.881           
##                  95% CI : (0.7437, 0.9602)
##     No Information Rate : 0.6905          
##     P-Value [Acc > NIR] : 0.003711        
##                                           
##                   Kappa : 0.7382          
##                                           
##  Mcnemar's Test P-Value : 0.371093        
##                                           
##             Sensitivity : 0.9231          
##             Specificity : 0.8621          
##          Pos Pred Value : 0.7500          
##          Neg Pred Value : 0.9615          
##              Prevalence : 0.3095          
##          Detection Rate : 0.2857          
##    Detection Prevalence : 0.3810          
##       Balanced Accuracy : 0.8926          
##                                           
##        'Positive' Class : 0               
##

ROC

library(pROC)

## Type 'citation("pROC")' for a citation.

## 
## Attaching package: 'pROC'

## The following objects are masked from 'package:stats':
## 
##     cov, smooth, var

roc1 <- roc(log_train_data$Y, model1_log$fitted.values, plot = T)

## Setting levels: control = 0, case = 1

## Setting direction: controls < cases

roc2 <- roc(log_test_data$Y, pred_test2, plot = T)

## Setting levels: control = 0, case = 1
## Setting direction: controls < cases

plot(roc1, col = 'green', main = 'Plot ROC Regresi Logistik', lty=1); plot(roc2, add = TRUE, lty=2, col = 'red'); legend("right", c("Train", "Test"), lty=1:2, col = c("green", "red"), bty="n", inset=c(0,-0.15))

ggroc(list(Train = roc1, Test = roc2), aes = c('linetype')) + labs(linetype = 'Data')

roc1$auc; roc2$auc

## Area under the curve: 0.8823

## Area under the curve: 0.8942

Artificial Neural Network - 1

Split Data

set.seed(122)
r <- runif(nrow(data_cancer))
data_cancer3 <- cbind(data_cancer, r)
data_cancer3 <- data_cancer3[order(data_cancer3$r),]
data_cancer3 <- data_cancer3[1:200, c('AP', 'XRay', 'Stage', 'Grade', 'Age', 'Y')]
head(data_cancer3)

##      AP XRay Stage Grade Age Y
## 8   146    0     1     0  55 1
## 24   83    0     0     0  56 0
## 187  80    0     1     1  50 0
## 27  119    0     1     1  69 1
## 281  84    0     1     0  55 0
## 214  44    1     1     1  66 1

nrow(data_cancer3)

## [1] 200

nn_sample <- sample(c(TRUE, FALSE), nrow(data_cancer3), 
                      replace=TRUE, prob=c(0.8,0.2))
nn_train_data  <- data_cancer3[nn_sample, ]
nn_test_data   <- data_cancer3[!nn_sample, ]
glimpse(nn_train_data); glimpse(nn_test_data)

## Rows: 157
## Columns: 6
## $ AP    <dbl> 146, 83, 80, 119, 44, 184, 51, 132, 83, 177, 125, 68, 89, 119, 1…
## $ XRay  <dbl> 0, 0, 0, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 0, 0…
## $ Stage <dbl> 1, 0, 1, 1, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 1, 1, 1, 0, 1, 0…
## $ Grade <dbl> 0, 0, 1, 1, 1, 0, 0, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0…
## $ Age   <dbl> 55, 56, 50, 69, 66, 51, 61, 53, 56, 50, 63, 65, 58, 56, 50, 64, …
## $ Y     <dbl> 1, 0, 0, 1, 1, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 0…

## Rows: 43
## Columns: 6
## $ AP    <dbl> 84, 178, 157, 170, 89, 114, 99, 151, 179, 105, 135, 45, 62, 43, …
## $ XRay  <dbl> 0, 1, 0, 0, 1, 1, 0, 1, 1, 1, 0, 1, 1, 0, 0, 0, 0, 1, 0, 0, 1, 1…
## $ Stage <dbl> 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 0, 1…
## $ Grade <dbl> 0, 0, 1, 0, 1, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 0, 1, 0, 1, 0, 1, 1…
## $ Age   <dbl> 55, 62, 59, 68, 64, 63, 50, 65, 69, 61, 64, 63, 63, 55, 57, 68, …
## $ Y     <dbl> 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1…

Model

library(neuralnet)

set.seed(122)
h <- c(3, 2)
nn_log_lin <- neuralnet(Y~., data = nn_train_data, act.fct = 'logistic',
                               linear.output = T, hidden = h) 
nn_tanh_lin <- neuralnet(Y~., data = nn_train_data, act.fct = 'tanh',
                               linear.output = T, hidden = h)
nn_log_nonlin <- neuralnet(Y~., data = nn_train_data, act.fct = 'logistic',
                               linear.output = F, hidden = h)
nn_tanh_nonlin <- neuralnet(Y~., data = nn_train_data, act.fct = 'tanh',
                               linear.output = F, hidden = h)
# error
nn_error <- data.frame('NN Logistic Linear' = nn_log_lin$result.matrix[1],
                       'NN Logistic Nonlinear' = nn_log_nonlin$result.matrix[1],
                       'NN Tan-H Linear' = nn_tanh_lin$result.matrix[1],
                       'NN Tan-H Nonlinear' = nn_tanh_nonlin$result.matrix[1])
nn_error

##   NN.Logistic.Linear NN.Logistic.Nonlinear NN.Tan.H.Linear NN.Tan.H.Nonlinear
## 1           9.287747              9.009976        13.81471           17.71338

model1_nn <- nn_log_nonlin
plot(model1_nn)

library(nnet)
library(devtools)

## Loading required package: usethis

source_url('https://gist.githubusercontent.com/fawda123/7471137/raw/466c1474d0a505ff044412703516c34f1a4684a5/nnet_plot_update.r')

## ℹ SHA-1 hash of file is "74c80bd5ddbc17ab3ae5ece9c0ed9beb612e87ef"

plot.nnet(model1_nn)

## Loading required package: scales

## Loading required package: reshape

## Warning in library(package, lib.loc = lib.loc, character.only = TRUE,
## logical.return = TRUE, : there is no package called 'reshape'

## Loading required package: reshape

## Warning in library(package, lib.loc = lib.loc, character.only = TRUE,
## logical.return = TRUE, : there is no package called 'reshape'

## Loading required package: reshape

## Warning in library(package, lib.loc = lib.loc, character.only = TRUE,
## logical.return = TRUE, : there is no package called 'reshape'

## Loading required package: reshape

## Warning in library(package, lib.loc = lib.loc, character.only = TRUE,
## logical.return = TRUE, : there is no package called 'reshape'

## Loading required package: reshape

## Warning in library(package, lib.loc = lib.loc, character.only = TRUE,
## logical.return = TRUE, : there is no package called 'reshape'

## Loading required package: reshape

## Warning in library(package, lib.loc = lib.loc, character.only = TRUE,
## logical.return = TRUE, : there is no package called 'reshape'

## Loading required package: reshape

## Warning in library(package, lib.loc = lib.loc, character.only = TRUE,
## logical.return = TRUE, : there is no package called 'reshape'

## Loading required package: reshape

## Warning in library(package, lib.loc = lib.loc, character.only = TRUE,
## logical.return = TRUE, : there is no package called 'reshape'

## Loading required package: reshape

## Warning in library(package, lib.loc = lib.loc, character.only = TRUE,
## logical.return = TRUE, : there is no package called 'reshape'

## Loading required package: reshape

## Warning in library(package, lib.loc = lib.loc, character.only = TRUE,
## logical.return = TRUE, : there is no package called 'reshape'

## Loading required package: reshape

## Warning in library(package, lib.loc = lib.loc, character.only = TRUE,
## logical.return = TRUE, : there is no package called 'reshape'

## Loading required package: reshape

## Warning in library(package, lib.loc = lib.loc, character.only = TRUE,
## logical.return = TRUE, : there is no package called 'reshape'

## Loading required package: reshape

## Warning in library(package, lib.loc = lib.loc, character.only = TRUE,
## logical.return = TRUE, : there is no package called 'reshape'

## Loading required package: reshape

## Warning in library(package, lib.loc = lib.loc, character.only = TRUE,
## logical.return = TRUE, : there is no package called 'reshape'

## Loading required package: reshape

## Warning in library(package, lib.loc = lib.loc, character.only = TRUE,
## logical.return = TRUE, : there is no package called 'reshape'

## Loading required package: reshape

## Warning in library(package, lib.loc = lib.loc, character.only = TRUE,
## logical.return = TRUE, : there is no package called 'reshape'

## Loading required package: reshape

## Warning in library(package, lib.loc = lib.loc, character.only = TRUE,
## logical.return = TRUE, : there is no package called 'reshape'

## Loading required package: reshape

## Warning in library(package, lib.loc = lib.loc, character.only = TRUE,
## logical.return = TRUE, : there is no package called 'reshape'

## Loading required package: reshape

## Warning in library(package, lib.loc = lib.loc, character.only = TRUE,
## logical.return = TRUE, : there is no package called 'reshape'

## Loading required package: reshape

## Warning in library(package, lib.loc = lib.loc, character.only = TRUE,
## logical.return = TRUE, : there is no package called 'reshape'

## Loading required package: reshape

## Warning in library(package, lib.loc = lib.loc, character.only = TRUE,
## logical.return = TRUE, : there is no package called 'reshape'

## Loading required package: reshape

## Warning in library(package, lib.loc = lib.loc, character.only = TRUE,
## logical.return = TRUE, : there is no package called 'reshape'

## Loading required package: reshape

## Warning in library(package, lib.loc = lib.loc, character.only = TRUE,
## logical.return = TRUE, : there is no package called 'reshape'

## Loading required package: reshape

## Warning in library(package, lib.loc = lib.loc, character.only = TRUE,
## logical.return = TRUE, : there is no package called 'reshape'

## Loading required package: reshape

## Warning in library(package, lib.loc = lib.loc, character.only = TRUE,
## logical.return = TRUE, : there is no package called 'reshape'

pred_train2 <- neuralnet::compute(model1_nn, nn_train_data)
pred_test2 <- neuralnet::compute(model1_nn, nn_test_data)
predtrain_nn <- ifelse(pred_train2$net.result > 0.5, 1, 0)
predtest_nn <- ifelse(pred_test2$net.result > 0.5, 1, 0)

Confusion Matrix

confusionMatrix(as.factor(nn_train_data$Y), as.factor(predtrain_nn))

## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  0  1
##          0 39 15
##          1  6 97
##                                           
##                Accuracy : 0.8662          
##                  95% CI : (0.8028, 0.9153)
##     No Information Rate : 0.7134          
##     P-Value [Acc > NIR] : 4.498e-06       
##                                           
##                   Kappa : 0.6914          
##                                           
##  Mcnemar's Test P-Value : 0.08086         
##                                           
##             Sensitivity : 0.8667          
##             Specificity : 0.8661          
##          Pos Pred Value : 0.7222          
##          Neg Pred Value : 0.9417          
##              Prevalence : 0.2866          
##          Detection Rate : 0.2484          
##    Detection Prevalence : 0.3439          
##       Balanced Accuracy : 0.8664          
##                                           
##        'Positive' Class : 0               
##

confusionMatrix(as.factor(nn_test_data$Y), as.factor(predtest_nn))

## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  0  1
##          0  6  3
##          1  5 29
##                                          
##                Accuracy : 0.814          
##                  95% CI : (0.666, 0.9161)
##     No Information Rate : 0.7442         
##     P-Value [Acc > NIR] : 0.1930         
##                                          
##                   Kappa : 0.4804         
##                                          
##  Mcnemar's Test P-Value : 0.7237         
##                                          
##             Sensitivity : 0.5455         
##             Specificity : 0.9062         
##          Pos Pred Value : 0.6667         
##          Neg Pred Value : 0.8529         
##              Prevalence : 0.2558         
##          Detection Rate : 0.1395         
##    Detection Prevalence : 0.2093         
##       Balanced Accuracy : 0.7259         
##                                          
##        'Positive' Class : 0              
##

Kurva ROC

roc3 <- roc(nn_train_data$Y, pred_train2$net.result, plot = T)

## Setting levels: control = 0, case = 1

## Warning in roc.default(nn_train_data$Y, pred_train2$net.result, plot = T):
## Deprecated use a matrix as predictor. Unexpected results may be produced,
## please pass a numeric vector.

## Setting direction: controls < cases

roc4 <- roc(nn_test_data$Y, pred_test2$net.result, plot = T)

## Setting levels: control = 0, case = 1

## Warning in roc.default(nn_test_data$Y, pred_test2$net.result, plot = T):
## Deprecated use a matrix as predictor. Unexpected results may be produced,
## please pass a numeric vector.

## Setting direction: controls < cases

ggroc(list(Train = roc3, Test = roc4), aes = c('linetype')) + labs(linetype = 'Data')

roc3$auc; roc4$auc

## Area under the curve: 0.831

## Area under the curve: 0.7418

ggroc(list(Logistic = roc1, ANN = roc3), aes = c('linetype')) + labs(linetype = 'Data Latih')

ggroc(list(Logistic = roc2, ANN = roc4), aes = c('linetype')) + labs(linetype = 'Data Uji')

Tugas STA1581 Prof Khairil

2022-11-12

Packages

Import Data

Dibuat Faktor

Subset Data

Eksplorasi Data

Split Data

Eksplorasi

Regresi Logistik

Odds Ratio

Matriks Konfusi Data Latih

Matriks Konfusi Data Uji

Confusion Matrix

ROC

Artificial Neural Network - 1

Split Data

Model

Confusion Matrix

Kurva ROC